Exemplo n.º 1
0
class Summary(object):
    def __init__(self, summary_name):
        '''

        :param summary_epoch:
        :param summary_name:
        '''
        self._logPath = os.path.join("log", summary_name)
        self._writer = SummaryWriter(self._logPath)

    def addTrainLoss(self, loss, epoch):
        self._writer.add_scalar('train loss', loss, epoch)

    def addValLoss(self, loss, epoch):
        self._writer.add_scalar('val loss', loss, epoch)

    def addLearningRate(self, lr, epoch):
        self._writer.add_scalar('learning rate', lr, epoch)

    def summaryEnd(self):
        self._writer.export_scalars_to_json(
            os.path.join(self._logPath, "all_scalars.json"))
        self._writer.close()

    def addPR_label_pred(self, label, prediction):
        self._writer.add_pr_curve('PR_curve',
                                  label,
                                  prediction,
                                  num_thresholds=1000)
Exemplo n.º 2
0
    def add_pr_curve(self,
                     tag,
                     labels,
                     predictions,
                     global_step=None,
                     num_thresholds=127,
                     weights=None,
                     walltime=None):

        if self.is_write:
            SummaryWriter.add_pr_curve(self, tag, labels, predictions,
                                       global_step, num_thresholds, weights,
                                       walltime)
Exemplo n.º 3
0
def tb_train2():
    import torchvision.utils as vutils
    import torchvision.models as models
    from torchvision import datasets

    resnet18 = models.resnet18(False)
    writer = SummaryWriter()
    sample_rate = 44100
    freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]

    for n_iter in range(100):
        dummy_s1 = torch.rand(1)
        dummy_s2 = torch.rand(1)
        # data grouping by `slash`
        writer.add_scalar('data/scalar1', dummy_s1[0], n_iter)
        writer.add_scalar('data/scalar2', dummy_s2[0], n_iter)

        writer.add_scalars('data/scalar_group', {'xsinx': n_iter * np.sin(n_iter),
                                                 'xcosx': n_iter * np.cos(n_iter),
                                                 'arctanx': np.arctan(n_iter)}, n_iter)

        dummy_img = torch.rand(32, 3, 64, 64)  # output from network
        if n_iter % 10 == 0:
            x = vutils.make_grid(dummy_img, normalize=True, scale_each=True)
            writer.add_image('Image', x, n_iter)

            dummy_audio = torch.zeros(sample_rate * 2)
            for i in range(x.size(0)):
                # amplitude of sound should in [-1, 1]
                dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate))
            writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate)

            writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)

            for name, param in resnet18.named_parameters():
                writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)

            # needs tensorboard 0.4RC or later
            writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter)

    dataset = datasets.MNIST('mnist', train=False, download=True)
    images = dataset.test_data[:100].float()
    label = dataset.test_labels[:100]

    features = images.view(100, 784)
    writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))

    # export scalar data to JSON for external processing
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
Exemplo n.º 4
0
    def compute_and_plot(self, tb_writer: tfx.SummaryWriter = None, itr=0):
        fpr, tpr, th = roc_curve(np.array(self.gt), np.array(self.pred))
        if tb_writer:
            for y, x in zip(tpr, fpr):
                tb_writer.add_scalars('Graphs', {'ROC': y * 100}, x * 100)
            tb_writer.flush()

        p, r, th = precision_recall_curve(np.array(self.gt),
                                          np.array(self.pred))

        if tb_writer:
            tb_writer.add_pr_curve('pr_curve', np.array(self.gt),
                                   np.array(self.pred), itr)
            for y, x in zip(p, r):
                tb_writer.add_scalars('Graphs', {'PR-Curve': y * 100}, x * 100)
            tb_writer.flush()

        return p, r, th
Exemplo n.º 5
0
class Experiment:
    def __init__(self, config):
        self.config = config

        self.load()
        self.save()

    def load(self):

        self.dataset = dataset_by_name(self.config.DATASET_NAME)(
            config=self.config)  # MNISTDataset, IndicatorDataset, LoadDataset

        self.train_dataloader = DataLoader(
            self.dataset.train_dataset,
            batch_size=self.config.TRAIN_BATCH_SIZE,
            shuffle=self.config.TRAIN_SHUFFLE,
            drop_last=True)
        self.valid_dataloader = DataLoader(
            self.dataset.valid_dataset,
            batch_size=self.config.VALID_BATCH_SIZE,
            shuffle=self.config.VALID_SHUFFLE,
            drop_last=True)

        MODEL = class_by_name(self.config.MODEL_NAME)  # CNN, LSTM
        self.model = MODEL(config=self.config).to(self.config.DEVICE)

        self.writer = SummaryWriter(
            log_dir=os.path.join(self.config.EXPERIMENT_DIR, 'summary'))

    def save(self):
        self.config.save()
        self.model.to_onnx(directory=self.config.EXPERIMENT_DIR)
        self.model.to_txt(directory=self.config.EXPERIMENT_DIR)

    def run_epoch(self, epoch):

        # Fit the model
        training_loss = self.model.fit(dataloader=self.train_dataloader).item()

        # Validate validation set
        validation_loss = self.model.validate(
            dataloader=self.valid_dataloader).item()

        # Predict
        images, labels = self.dataset.random_sample(n=16)
        prediction_logprob = self.model.predict(xs=images)[0].cpu().detach()
        predicted_labels = prediction_logprob.max(
            1, keepdim=True)[1].numpy().flatten()

        # Write losses to the tensorboard
        self.writer.add_scalar('training_loss', training_loss, epoch)
        self.writer.add_scalar('validation_loss', validation_loss, epoch)

        # Write random image to the summary writer.
        image_grid = torchvision.utils.make_grid(images,
                                                 normalize=True,
                                                 scale_each=True)
        self.writer.add_image(tag="RandomSample y-{} yhat{}".format(
            '.'.join(map(str, labels)), '.'.join(map(str, predicted_labels))),
                              img_tensor=image_grid,
                              global_step=epoch)

        # Write PR Curve to the summary writer.
        self.writer.add_pr_curve('xoxo', np.random.randint(2, size=100),
                                 np.random.rand(100), epoch)

        # for name, param in model.named_parameters():
        #     print(name)
        #     print(param)
        #     model.writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch, bins=100)
        # x = dict(model.named_parameters())['conv1.weight'].clone().cpu().data.numpy()
        # kernel1= x[0,0]
        # plt.imshow(kernel1)
        # plt.show()
        # needs tensorboard 0.4RC or later

    def run(self):
        epoch = 0
        with trange(epoch, self.config.EPOCH_SIZE) as t:
            for epoch in t:
                self.run_epoch(epoch=epoch)

        self.writer.export_scalars_to_json(self.config.EXPERIMENT_DIR)
Exemplo n.º 6
0
class FSLDiscriminatorAgent(BaseAgent):
    def __init__(self, config):
        super().__init__(config)
        print(torch.__version__)
        # define models
        self.gen_model = GenerativeFSL_CAEModel()
        self.model = EncoderModel(self.config)
        # define loss
        self.loss = nn.MSELoss()  # nn.NLLLoss()

        # set cuda flag
        self.is_cuda = torch.cuda.is_available()
        if self.is_cuda and not self.config.cuda:
            self.logger.info(
                "WARNING: You have a CUDA device, so you should probably enable CUDA"
            )

        self.cuda = self.is_cuda & self.config.cuda

        # set the manual seed for torch
        self.manual_seed = self.config.seed
        if self.cuda:
            torch.cuda.manual_seed(self.manual_seed)
            torch.cuda.manual_seed_all(self.manual_seed)
            self.device = torch.device("cuda")
            torch.cuda.set_device(self.config.gpu_device)
            self.model = self.model.to(self.device)
            self.loss = self.loss.to(self.device)
            self.logger.info("Program will run on *****GPU-CUDA***** ")
            print_cuda_statistics()
        else:
            self.device = torch.device("cpu")
            torch.manual_seed(self.manual_seed)
            random.seed(self.manual_seed)
            torch.cuda.manual_seed_all(self.manual_seed)
            np.random.seed(self.manual_seed)
            torch.backends.cudnn.deterministic = True
            torch.backends.cudnn.benchmark = False
            self.logger.info("Program will run on *****CPU*****\n")
        summary(self.model,
                input_size=(3, self.config.image_size, self.config.image_size))

        # define optimizer
        self.optimizer = optim.RMSprop(self.model.parameters(),
                                       alpha=0.99,
                                       lr=self.config.learning_rate,
                                       eps=1e-08,
                                       weight_decay=0,
                                       momentum=self.config.momentum)
        # optim.SGD(self.model.parameters(), lr=self.config.learning_rate, momentum=self.config.momentum)

        # initialize counter
        self.current_epoch = 0
        self.current_iteration = 0
        self.best_metric = 0
        self.best_train_loss = 999999999999
        self.fixed_noise = Variable(
            torch.randn(self.config.batch_size, 3, self.config.image_size,
                        self.config.image_size))
        # Summary Writer
        self.summary_writer = SummaryWriter(
            log_dir=self.config.summary_dir,
            comment='GenerativeFSL Covid Prediction')

    def save_checkpoint(self,
                        filename='discriminator_checkpoint.pth.tar',
                        is_best=False):
        """
        Saving the latest checkpoint of the training
        :param filename: filename which will contain the state
        :param is_best: flag is it is the best model
        :return:
        """
        domain_checkpoint_file = self.config.target_domain + '_' + self.config.checkpoint_file
        state = {
            'epoch': self.current_epoch,
            'iteration': self.current_iteration,
            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
        }
        encoder_state = {
            'epoch': self.current_epoch,
            'iteration': self.current_iteration,
            'state_dict': self.model.encoder.state_dict(),
            'optimizer': self.optimizer.state_dict(),
        }

        self.logger.info(
            "Checkpoint saving  from '{}' at (epoch {}) at (iteration {})\n".
            format(self.config.checkpoint_dir, state['epoch'],
                   state['iteration']))
        # Save the state
        torch.save(state, self.config.checkpoint_dir + domain_checkpoint_file)
        shutil.copyfile(
            self.config.checkpoint_dir + domain_checkpoint_file,
            self.config.checkpoint_dir + str(state['epoch']) +
            domain_checkpoint_file)

        # If it is the best copy it to another file 'model_best.pth.tar'
        if is_best:
            shutil.copyfile(
                self.config.checkpoint_dir + domain_checkpoint_file,
                self.config.checkpoint_dir + 'BestModel_' +
                str(state['epoch']) + domain_checkpoint_file)

    def load_checkpoint(self, filename):
        filename = self.config.checkpoint_dir + filename
        try:
            self.logger.info(
                "******Loading checkpoint '{}' from dir {}".format(
                    filename, self.config.checkpoint_dir))
            checkpoint = torch.load(filename)
            self.logger.info("********Loaded checkpoint '{}'".format(filename))
            self.current_epoch = checkpoint['epoch'] + 1
            self.current_iteration = checkpoint['iteration']
            self.model.load_state_dict(checkpoint['state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.logger.info(
                "Checkpoint loaded successfully from '{}' at (epoch {}) at (iteration {})\n"
                .format(self.config.checkpoint_dir, checkpoint['epoch'],
                        checkpoint['iteration']))
        except OSError as e:
            self.logger.info(
                "No checkpoint exists from '{}'. Skipping...".format(
                    self.config.checkpoint_dir))
            self.logger.info("**First time to train**")

    def set_parameter_requires_grad(self, model, feature_extract):
        print("############Setting grad")
        if feature_extract:
            lt = 2
            cntr = 0
            for child in model.children():
                print("child", child)
                cntr += 1
                if cntr < lt:
                    for param in child.parameters():
                        param.requires_grad = False

    def load_source_model(self):
        try:
            domain_name = self.config.source_domain
            self.logger.info(
                "******Loading source model for domain '{}'".format(
                    domain_name))
            filename = os.path.join("model_repo",
                                    domain_name + "genfsl_checkpoint.pth.tar")
            checkpoint = torch.load(filename)
            self.logger.info("********Loaded checkpoint '{}'".format(filename))
            self.current_epoch = 0
            self.current_iteration = 0
            self.model.load_state_dict(checkpoint['state_dict'], strict=False)
            self.logger.info(
                "Checkpoint loaded successfully from '{}' at (epoch {}) at (iteration {})\n"
                .format(self.config.checkpoint_dir, checkpoint['epoch'],
                        checkpoint['iteration']))
            self.set_parameter_requires_grad(self.model, feature_extract=True)
            return True
        except OSError as e:
            self.logger.info(
                "No model checkpoint {} exists for source domain {}. Skipping..."
                .format(filename, domain_name))
            return False

    def load_model(self, domain_name):
        try:
            self.logger.info(
                "*Loading  trained generative FSL model  for domain '{}' for testing only"
                .format(domain_name))
            filename = os.path.join("tuned_model_repo",
                                    self.config.tuned_model_name)
            checkpoint = torch.load(filename)
            self.logger.info("********Loaded checkpoint '{}'".format(filename))
            self.current_epoch = 0
            self.current_iteration = 0
            self.model.load_state_dict(checkpoint['state_dict'], strict=False)
            self.logger.info(
                "Checkpoint loaded successfully from '{}' at (epoch {}) at (iteration {})\n"
                .format(self.config.checkpoint_dir, checkpoint['epoch'],
                        checkpoint['iteration']))
            return True
        except OSError as e:
            self.logger.info(
                "No model checkpoint exists for target domain {}. Skipping...".
                format(domain_name))
            return False

    def run(self):
        """
        This function will the operator
        :return:
        """
        try:
            if self.config.mode == 'test':
                self.validate_target_domain()
            else:
                self.train_target_domain()

        except KeyboardInterrupt:
            self.logger.info("You have entered CTRL+C.. Wait to finalize")

    def train_target_domain(self):
        """
        This function will the operator
        :return:
        """
        domain_name = self.config.target_domain
        self.train_model(domain_name)

    def train_model(self, domain_name):
        self.logger.info("Fine-tuning.....Target {}, Source {} ".format(
            domain_name, self.config.source_domain))
        try:
            if self.load_source_model():
                self.data_loader = TargetDataLoader(config=self.config)
                self.train(domain_name)
        except KeyboardInterrupt:
            self.logger.info("You have entered CTRL+C.. Wait to finalize")

    def validate_target_domain(self):
        """
        This function will the operator
        :return:
        """
        domain_name = self.config.source_domain
        self.test_model(domain_name)

    def test_model(self, domain_name):
        self.logger.info("Testing.....Source {}, Target {} ".format(
            domain_name, self.config.target_domain))
        try:
            if self.load_model(self.config.target_domain):
                self.data_loader = TargetDataLoader(config=self.config)
                with open(self.config.results_file_name,
                          mode='a+') as csv_file:
                    fieldnames = [
                        'Threshold', 'Confusion_Matrix', 'Sensitivity',
                        'Specificity', 'F1', 'Accuracy'
                    ]
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
                    writer.writeheader()
                    if self.config.thresholding == False:
                        row, _ = self.validate(0.5)
                        writer.writerow(row)
                    else:
                        for threshold in np.linspace(0.1, 0.9, 9):
                            row, _ = self.validate(threshold)
                            writer.writerow(row)
                    csv_file.close()
        except KeyboardInterrupt:
            self.logger.info("You have entered CTRL+C.. Wait to finalize")

    def train(self, domain_name):
        """
        Main training function, with per-epoch model saving
        """
        summary(self.model,
                input_size=(3, self.config.image_size, self.config.image_size))
        # weight=class_weights)  # MSELoss()#BCE_KLDLoss(self.model)
        self.criterion = nn.CrossEntropyLoss()
        self.logger.info(
            "Training linear layers for generative FSL in {} domain".format(
                domain_name))
        # Model Loading from the latest checkpoint if not found start from scratch.
        domain_checkpoint_file = domain_name + self.config.checkpoint_file
        self.logger.info(
            "LOADING {}....................".format(domain_checkpoint_file))

        self.load_checkpoint(domain_checkpoint_file)
        for epoch in range(self.current_epoch,
                           self.current_epoch + self.config.max_epoch):
            self.current_epoch = epoch
            train_loss = self.train_one_epoch(domain_name)
            #_,valid_loss = self.validate()
            is_best = train_loss < self.best_train_loss
            if is_best:
                self.best_train_loss = train_loss
            self.save_checkpoint(is_best=is_best)

    def train_one_epoch(self, domain_name):
        """
        One epoch of training
        :return:
        """
        self.model.train()
        epoch_lossD = AverageMeter()
        for batch_idx, data in enumerate(self.data_loader.train_loader):
            # credit assignment
            self.optimizer.zero_grad()  # clear the gardients
            imgs, labels = data
            imgs = imgs.to(self.device)
            predicted_labels = self.model(imgs)
            loss = self.criterion(predicted_labels, labels)
            loss.backward()
            # update model weights
            self.optimizer.step()
            epoch_lossD.update(loss.item())
            self.logger.info(batch_idx)
            if batch_idx % self.config.log_interval == 0:
                self.logger.info(
                    'Last Layers Training Epoch: {} [{}/{} ({:.0f}%)] Loss: {:6f}'
                    .format(
                        self.current_epoch, batch_idx * self.config.batch_size,
                        len(self.data_loader.train_loader.dataset),
                        100. * (batch_idx * self.config.batch_size /
                                len(self.data_loader.train_loader.dataset)),
                        loss.item()))
            self.current_iteration += 1
            self.summary_writer.add_scalar(
                "epoch/Training_Loss_" + domain_name, epoch_lossD.val,
                self.current_iteration)

        # self.visualize_one_epoch()

        self.logger.info("Training linear layers at epoch-" +
                         str(self.current_epoch) + " | " +
                         " - Training Loss-: " + str(epoch_lossD.val))
        return epoch_lossD.val

    def visualize_one_epoch(self):
        """
        One epoch of visualizing
        :return:
        """
        self.model.eval()
        test_loss = 0
        with torch.no_grad():
            for batch_idx, data in enumerate(self.data_loader.test_loader):
                testimgs, predicted_labels = data  # data.to(self.device
                testimgs = testimgs.to(self.device)
                predicted_labels = self.model(testimgs)
                #generated_testimgs = generated_testimgs[0]
                # make_dot(generated_img[0])
                print(list(predicted_labels.size()))
                # print(list(testimgs.size()))
                # plt.figure()
                #img = testimgs[batch_idx]
                # img = generated_testimgs #.reshape((generated_testimgs.size()[0], 3,224,224))
                # print(list(img.size()))
                #img  = img.permute(0,3,1,2)
                # print(list(img.size()))
                # self.data_loader.plot_samples_per_epoch_with_labels(img,self.current_epoch,labels=predicted_labels)
                # plt.imshow(img.numpy())

    def add_pr_curve_tensorboard(self,
                                 class_index,
                                 test_probs,
                                 test_preds,
                                 global_step=0):
        '''
        Takes in a "class_index" from 0 to 9 and plots the corresponding
        precision-recall curve
        '''
        tensorboard_preds = test_preds == class_index
        tensorboard_probs = test_probs[:, class_index]
        self.summary_writer.add_pr_curve('PR for Covid prediction',
                                         tensorboard_preds,
                                         tensorboard_probs,
                                         global_step=global_step)

    def validate(self, threshold=0.5):
        """
        One cycle of model validation
        :return:
        """
        self.criterion = nn.CrossEntropyLoss()
        self.model.eval()
        test_loss = 0
        correct = 0
        y_true = []
        y_pred = []
        with torch.no_grad():
            for batch_idx, data in enumerate(self.data_loader.test_loader):
                images, labels = data  # .to(self.device)
                labels_list = [element.item() for element in labels.flatten()]
                y_true_batch = labels_list
                output = self.model(images)  # [B,2]
                #print("Batch idx{} and size{}".format(batch_idx,len(labels_list)))
                #print(output)
                # converting the output layer values into labels 0 or one based on threshold
                sm = torch.nn.Softmax(1)  # constrained probabilitites
                output = sm(output)
                #print("after softmax",output)
                #thresh = torch.nn.Threshold(threshold,0,False)
                thresholded_output = output > threshold  #thresh(output)
                y_pred_batch = []
                #print("after thresholding",thresholded_output)
                output_max_value = torch.max(thresholded_output, 1)
                #print("gadbad",output_max_value)
                y_pred_batch = output_max_value[1]
                #print("matching in batch", len([y_pred_batch == y_true]))
                #print("Sample pred", y_pred_batch[0], len(y_pred_batch))
                y_true.extend(y_true_batch)
                y_pred.extend(y_pred_batch)
                #if batch_idx == 0 :
                #    break
        #print(len(y_true),"%%%%",len(y_pred))
        print("Threshold", threshold)
        tn, fp, fn, tp = sklearn.metrics.confusion_matrix(y_true,
                                                          y_pred).ravel()
        cf = sklearn.metrics.confusion_matrix(y_true, y_pred)
        print("CF", cf)
        print("confusion matrix ", tn, fp, fn, tp)
        sensitivity = tp / (tp + fn)
        specificity = tn / (tn + fp)
        p = sklearn.metrics.precision_score(y_true, y_pred)
        print("PRECISION", p)
        print("computed PRECISION", tp / (tp + fp))
        r = sklearn.metrics.recall_score(y_true, y_pred)
        print("recall", r)
        print("computed recall", tp / (tp + fn))
        f1 = sklearn.metrics.f1_score(y_true, y_pred, average="binary")
        print("F1", f1)
        acc = sklearn.metrics.accuracy_score(y_true, y_pred)
        print("acc", acc)
        fpr, tpr, thresholds = sklearn.metrics.roc_curve(y_true,
                                                         y_pred,
                                                         pos_label=2)
        #print("fpr {},tpr {}, thresholds {}".format(fpr,tpr,thresholds))
        print("sensitivity {},specificity {}".format(sensitivity, specificity))
        #print("auc for covid class ", sklearn.metrics.auc(fpr, tpr))
        #test_loss /= len(self.data_loader.test_loader.dataset)
        #self.logger.info('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        #    test_loss, correct, len(self.data_loader.test_loader.dataset),
        #    100. * acc))
        #fieldnames = ['threhhold', 'Sensitivity', 'Specificity', 'F1', 'Accuracy']
        results = {
            'Threshold': threshold,
            'Confusion_Matrix': cf,
            'Sensitivity': sensitivity,
            'Specificity': specificity,
            'F1': f1,
            'Accuracy': acc
        }

        #"" + str(threshold) +"," + str(r) +"," + str(p) +"," + str(f1) +"," + str(acc)
        return results, test_loss

    def finalize(self):
        """
        Finalizes all the operations of the 2 Main classes of the process, the operator and the data loader
        :return:
        """
        self.logger.info(
            "Please wait while finalizing the operation.. Thank you")
        #self.save_checkpoint()
        self.summary_writer.export_scalars_to_json("{}all_scalars.json".format(
            self.config.summary_dir))
        self.summary_writer.close()
Exemplo n.º 7
0
                prec[:, j], recall[:, j] = (tp + 1e-10) / (
                    y_temp.sum(dim=-1).sum(dim=-1) + 1e-10), (tp + 1e-10) / (
                        mask.sum(dim=-1).sum(dim=-1) + 1e-10)
            # (batch, threshold)
            precs.append(prec)
            recalls.append(recall)

        prec = torch.cat(precs, dim=0).mean(dim=0)
        recall = torch.cat(recalls, dim=0).mean(dim=0)
        f_score = (1 + beta_square) * prec * recall / (beta_square * prec +
                                                       recall)
        thlist = torch.linspace(0, 1 - 1e-10, 256)
        print("Max F_score :", torch.max(f_score))
        print("Max_F_threshold :", thlist[torch.argmax(f_score)])
        if args.logdir is not None:
            writer.add_scalar("Max F_score",
                              torch.max(f_score),
                              global_step=model_iter)
            writer.add_scalar("Max_F_threshold",
                              thlist[torch.argmax(f_score)],
                              global_step=model_iter)
        pred = torch.cat(preds, 0)
        mask = torch.cat(masks, 0).round().float()
        if args.logdir is not None:
            writer.add_pr_curve('PR_curve', mask, pred, global_step=model_iter)
            writer.add_scalar('MAE',
                              torch.mean(torch.abs(pred - mask)),
                              global_step=model_iter)
        print("MAE :", torch.mean(torch.abs(pred - mask)))
        # Measure method from https://github.com/AceCoooool/DSS-pytorch solver.py
Exemplo n.º 8
0
class Summarizer(object):
    def __init__(self):
        self.report = False
        self.global_step = None
        self.writer = None

    def initialize_writer(self, log_dir):
        self.writer = SummaryWriter(log_dir)

    def add_scalar(self, tag, scalar_value, global_step=None, walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_scalar(tag,
                               scalar_value,
                               global_step=global_step,
                               walltime=walltime)

    def add_scalars(self,
                    main_tag,
                    tag_scalar_dict,
                    global_step=None,
                    walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_scalars(self,
                                main_tag,
                                tag_scalar_dict,
                                global_step=global_step,
                                walltime=walltime)

    def add_histogram(self,
                      tag,
                      values,
                      global_step=None,
                      bins='tensorflow',
                      walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        if isinstance(values, chainer.cuda.cupy.ndarray):
            values = chainer.cuda.to_cpu(values)

        self.writer.add_histogram(tag,
                                  values,
                                  global_step=global_step,
                                  bins=bins,
                                  walltime=walltime)

    def add_image(self, tag, img_tensor, global_step=None, walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_image(tag,
                              img_tensor,
                              global_step=global_step,
                              walltime=walltime)

    def add_image_with_boxes(self,
                             tag,
                             img_tensor,
                             box_tensor,
                             global_step=None,
                             walltime=None,
                             **kwargs):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_image_with_boxes(tag,
                                         img_tensor,
                                         box_tensor,
                                         global_step=global_step,
                                         walltime=walltime,
                                         **kwargs)

    def add_figure(self,
                   tag,
                   figure,
                   global_step=None,
                   close=True,
                   walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_figure(tag,
                               figure,
                               global_step=global_step,
                               close=close,
                               walltime=walltime)

    def add_video(self,
                  tag,
                  vid_tensor,
                  global_step=None,
                  fps=4,
                  walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_video(tag,
                              vid_tensor,
                              global_step=global_step,
                              fps=fps,
                              walltime=walltime)

    def add_audio(self,
                  tag,
                  snd_tensor,
                  global_step=None,
                  sample_rate=44100,
                  walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_audio(tag,
                              snd_tensor,
                              global_step=global_step,
                              sample_rate=sample_rate,
                              walltime=walltime)

    def add_text(self, tag, text_string, global_step=None, walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_text(tag,
                             text_string,
                             global_step=global_step,
                             walltime=walltime)

    def add_graph_onnx(self, prototxt):
        if not self.report:
            return

        self.writer.add_graph_onnx(self, prototxt)

    def add_graph(self, model, input_to_model=None, verbose=False, **kwargs):
        if not self.report:
            return

        self.writer.add_graph(model,
                              input_to_model=input_to_model,
                              verbose=verbose,
                              **kwargs)

    def add_embedding(self,
                      mat,
                      metadata=None,
                      label_img=None,
                      global_step=None,
                      tag='default',
                      metadata_header=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_embedding(mat,
                                  metadata=metadata,
                                  label_img=label_img,
                                  global_step=global_step,
                                  tag=tag,
                                  metadata_header=metadata_header)

    def add_pr_curve(self,
                     tag,
                     labels,
                     predictions,
                     global_step=None,
                     num_thresholds=127,
                     weights=None,
                     walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_pr_curve(tag,
                                 labels,
                                 predictions,
                                 global_step=global_step,
                                 num_thresholds=num_thresholds,
                                 weights=weights,
                                 walltime=walltime)

    def add_pr_curve_raw(self,
                         tag,
                         true_positive_counts,
                         false_positive_counts,
                         true_negative_counts,
                         false_negative_counts,
                         precision,
                         recall,
                         global_step=None,
                         num_thresholds=127,
                         weights=None,
                         walltime=None):
        if not self.report:
            return

        if global_step is None and self.global_step is not None:
            global_step = self.global_step

        self.writer.add_pr_curve_raw(tag,
                                     true_positive_counts,
                                     false_positive_counts,
                                     true_negative_counts,
                                     false_negative_counts,
                                     precision,
                                     recall,
                                     global_step=global_step,
                                     num_thresholds=num_thresholds,
                                     weights=weights,
                                     walltime=walltime)

    def add_custom_scalars_multilinechart(self,
                                          tags,
                                          category='default',
                                          title='untitled'):
        if not self.report:
            return
        self.writer.add_custom_scalars_multilinechart(tags,
                                                      category=category,
                                                      title=title)

    def add_custom_scalars_marginchart(self,
                                       tags,
                                       category='default',
                                       title='untitled'):
        if not self.report:
            return
        self.writer.add_custom_scalars_marginchart(tags,
                                                   category=category,
                                                   title=title)

    def add_custom_scalars(self, layout):
        if not self.report:
            return
        self.writer.add_custom_scalars(layout)
Exemplo n.º 9
0
    dummy_img = torch.rand(32, 3, 64, 64)  # output from network
    if n_iter % 10 == 0:
        x = vutils.make_grid(dummy_img, normalize=True, scale_each=True)
        writer.add_image('Image', x, n_iter)

        dummy_audio = torch.zeros(sample_rate * 2)
        for i in range(x.size(0)):
            # amplitude of sound should in [-1, 1]
            dummy_audio[i] = np.cos(freqs[n_iter // 10] * np.pi * float(i) / float(sample_rate))
        writer.add_audio('myAudio', dummy_audio, n_iter, sample_rate=sample_rate)

        writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)

        for name, param in resnet18.named_parameters():
            writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)

        # needs tensorboard 0.4RC or later
        writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(100), n_iter)

dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]

features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))

# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")
writer.close()
Exemplo n.º 10
0
def launch_train(config,
                 model,
                 path_model,
                 path_data_train,
                 path_data_dev,
                 nb_epoch=5,
                 device='cpu',
                 type_sentence_embedding='lstm',
                 restart_at_epoch=0):
    #https://gist.github.com/Tushar-N/dfca335e370a2bc3bc79876e6270099e
    check_dev_epoch = 1

    writer = SummaryWriter(comment='1 couche')
    '''with open(path_data_train+'inputs_embeddings.pickle', 'rb') as handle:
        inputs_embeddings_train = pickle.load(handle)
    with open(path_data_train+'outputs_refs.pickle', 'rb') as handle:
        outputs_refs_train = pickle.load(handle)
    with open(path_data_dev+'inputs_embeddings.pickle', 'rb') as handle:
        inputs_embeddings_dev = pickle.load(handle)
    with open(path_data_dev+'outputs_refs.pickle', 'rb') as handle:
        outputs_refs_dev = pickle.load(handle)'''

    #model = torch.nn.DataParallel(model, dim=dim)#, device_ids=[0, 1, 2])
    #pos_weight = torch.FloatTensor(len(negatives)/len(positives))
    #pos_weight = pos_weight.to(device)
    criterion = nn.BCELoss(
    )  #nn.BCEWithLogitsLoss(pos_weight=None)#pos_weight)#BCELoss()#NLLLoss()
    #optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    #scheduler = StepLR(optimizer, step_size=math.ceil(nb_epoch/5), gamma=0.2)
    #scheduler = ReduceLROnPlateau(optimizer, 'min')

    model = model.train()

    # Launch training
    print('début train')
    losses = []
    losses_dev = []
    #ids_iter=list(range(len(inputs_embeddings_train)))
    nb_files = len(
        list(glob.glob(path_data_train + 'inputs_embeddings_*.pickle')))
    for epoch in range(restart_at_epoch, nb_epoch):
        print('epoch', epoch + 1, 'on', nb_epoch)
        #shuffle(ids_iter)
        losses_ = []
        #scheduler.step(epoch)
        #TEST
        #for id_, it_ in enumerate(iter_):
        Y_pred = []
        Y_ref = []
        for it_ in tqdm(range(nb_files)):
            #for it_ in range(nb_files):
            #print(it_+1,'on',nb_files,'epoch',epoch+1,'on',nb_epoch)

            sentences_embs = torch.load(path_data_train +
                                        'inputs_embeddings_' + str(it_) +
                                        '.pickle')
            X_lengths = torch.load(path_data_train + 'X_lengths_' + str(it_) +
                                   '.pickle')
            refs = torch.load(path_data_train + 'outputs_refs_' + str(it_) +
                              '.pickle')
            '''with open(path_data_train+'inputs_embeddings_'+str(it_)+'.pickle', 'rb') as handle:
                sentences_emb = pickle.load(handle)
            with open(path_data_train+'outputs_refs_'+str(it_)+'.pickle', 'rb') as handle:
                ref = pickle.load(handle)'''

            for sentences_emb, X_length, ref in zip(
                    sentences_embs, X_lengths, refs
            ):  #Each file contains all the tensor of window-size for one episode
                #sentences_emb = inputs_embeddings_train[it_] #(8,32,4096)
                #ref = outputs_refs_train[it_] #(1,32)
                if sentences_emb.shape[
                        0] == 0:  #TODO il y a des tensors vide, par exemple le 142ème en partant de 0
                    #print('tensor empty wtf')
                    continue
                sentences_emb = sentences_emb.to(device)
                X_length = X_length.to(device)
                ref = ref.to(device)
                #torch.Size([36, 32, 300]) torch.Size([1, 31])
                #print(sentences_emb.size(), ref.size()) #torch.Size([34, 32, 300]) torch.Size([1, 31])
                # Step 1. Remember that Pytorch accumulates gradients.
                # We need to clear them out before each instance
                #print(i, nb_sentences)
                #model.zero_grad()
                # zero the parameter gradients
                optimizer.zero_grad()

                # Step 2. Get our inputs ready for the network, that is, turn them into
                # Tensors of word indices.
                # Also, we need to clear out the hidden state of the LSTM,
                # detaching it from its history on the last instance.
                if type_sentence_embedding == 'lstm':
                    model.hidden_sentences = model.init_hidden(
                        batch_size=sentences_emb.shape[1]
                    )  #(L,B,D) -> (109,8..,300)
                    #model.hidden = model.init_hidden(batch_size=int(sentences_emb.shape[1]/model.taille_context))
                    # Step 3. Run our forward pass.
                    #print('sentences_emb word embeddings',sentences_emb)
                    sentences_emb_ = model.forward_sentence(
                        sentences_emb, X_length)  #(32,300)
                    #print('sentences_emb_',sentences_emb_.shape)
                    #print('sentences_emb_',sentences_emb_)
                    to_packed_X = []
                    to_packed_Y = []
                    ref = ref.squeeze(0)
                    #print(sentences_emb_.shape[0], model.taille_context, sentences_emb_.shape[0] - model.taille_context, ref.size())
                    for i in range(sentences_emb_.shape[0] -
                                   model.taille_context + 1):
                        to_packed_X.append(
                            torch.index_select(
                                sentences_emb_, 0,
                                torch.tensor(list(
                                    range(i, i + model.taille_context)),
                                             device=device)))
                        to_packed_Y.append(
                            torch.index_select(
                                ref, 0,
                                torch.tensor(
                                    [i + (int(model.taille_context / 2) - 1)],
                                    device=device)))
                    sentences_emb = torch.stack(to_packed_X).transpose(
                        0, 1)  #(n,8,300) -> (8,n,300)
                    sentences_emb = sentences_emb.to(device)
                    ref = torch.stack(to_packed_Y).transpose(
                        0, 1)  #(n,1) -> (1,n)
                    #torch.Size([8, 25, 300]) torch.Size([1, 25])
                    #print(sentences_emb.size(), ref.size())

                model.hidden = model.init_hidden(
                    batch_size=sentences_emb.shape[1])

                # Step 3. Run our forward pass.
                #print('sentences_emb',sentences_emb.shape)
                #print('sentences_emb',sentences_emb)
                prediction = model(
                    sentences_emb)  #(32,1)    #(1,32,4096) or (109,8*32..,300)

                #WTFFFF torch.Size([25, 1]) torch.Size([1, 25]) torch.Size([8, 25, 300])
                #print('WTFFFF', prediction.size(), ref.size(), sentences_emb.size()) #torch.Size([4, 1]) torch.Size([1, 32]) torch.Size([34, 32, 300])
                prediction = torch.squeeze(prediction, 1)
                ref = torch.squeeze(ref, 0)
                #tensor([0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659, 0.6659], device='cuda:1', grad_fn=<SqueezeBackward1>) tensor([1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 1., 1.], device='cuda:1')
                #print(prediction, ref)
                #print(prediction.shape, ref.shape)

                # Step 4. Compute the loss, gradients, and update the parameters by
                #  calling optimizer.step()
                #print(prediction.size(), ref.size(), sentences_emb.size()) #torch.Size([4]) torch.Size([356, 1]) torch.Size([34, 32, 300])
                loss = criterion(prediction, ref)  #targets)
                losses_.append(loss.item())
                loss.backward()
                optimizer.step()

                # To calculate the EER
                Y_pred.append(np.asarray(prediction.detach().to('cpu')))
                Y_ref.append(np.asarray(ref.to('cpu')))
            #break
        '''# Calculate the EER
        model_eval = model.eval()
        fpr, tpr, threshold = roc_curve(Y_ref, Y_pred, pos_label=1)
        fnr = 1 - tpr
        eer_threshold = threshold(np.nanargmin(np.absolute((fnr - fpr))))
        eer = fpr(np.nanargmin(np.absolute((fnr - fpr))))'''
        #print(np.mean(np.concatenate(Y_ref, axis=None)), np.mean(np.concatenate(Y_pred, axis=None)))
        Y_ref = np.concatenate(Y_ref, axis=None)
        Y_pred = np.concatenate(Y_pred, axis=None)
        writer.add_pr_curve('score_train', np.mean(Y_ref), np.mean(Y_pred),
                            epoch)
        mean_loss_train = np.mean(np.asarray(losses_))

        mean_loss_per_epoch = mean_loss_train  #np.mean(np.asarray(losses_))
        #print('Sum/len losses', sum(losses_)/len(losses_))
        print('Mean loss per epoch train', mean_loss_per_epoch)
        losses.append(mean_loss_per_epoch)
        #model.get_prediction(X_, Y_, idx_set_words, embed, model, taille_context=taille_context, device=device)
        #break
        #TEST gagne du temps en ne sauvegardant pas les modèles
        torch.save(model.state_dict(),
                   path_model + 'models/model_' + str(epoch) + '.pth.tar')
        #break
        if epoch % check_dev_epoch == 0:  #We evaluate on dev set
            for name, param in model.named_parameters():
                writer.add_histogram(name,
                                     param.clone().cpu().data.numpy(), epoch)

            # Calculate the EER
            #model_eval = model.eval() #TODO
            #Y_ref = np.concatenate(Y_ref, axis=None)
            #Y_pred = np.concatenate(Y_pred, axis=None)
            fpr, tpr, threshold = roc_curve(Y_ref, Y_pred, pos_label=1)
            fnr = 1 - tpr
            eer_threshold_train = threshold[np.nanargmin(
                np.absolute((fnr - fpr)))]
            eer_train = fpr[np.nanargmin(np.absolute((fnr - fpr)))]

            #ids_iter=list(range(len(inputs_embeddings_train)))
            losses_dev_ = []
            Y_pred = []
            Y_ref = []
            best_loss_dev = None
            id_best_loss_dev = 0
            aa = True
            for it_ in range(
                    len(
                        list(
                            glob.glob(path_data_dev +
                                      'inputs_embeddings_*.pickle')))):

                sentences_emb = torch.load(path_data_dev +
                                           'inputs_embeddings_' + str(it_) +
                                           '.pickle')
                X_lengths = torch.load(path_data_dev + 'X_lengths_' +
                                       str(it_) + '.pickle')
                ref = torch.load(path_data_dev + 'outputs_refs_' + str(it_) +
                                 '.pickle')
                '''with open(path_data_dev+'inputs_embeddings_'+str(it_)+'.pickle', 'rb') as handle:
                    sentences_emb = pickle.load(handle)
                with open(path_data_dev+'outputs_refs_'+str(it_)+'.pickle', 'rb') as handle:
                    ref = pickle.load(handle)'''

                for sentences_emb, ref in zip(
                        sentences_embs, refs
                ):  #Each file contains all the tensor of window-size for one episode
                    #print(it_,'on',len(ids_iter),' dev')
                    #sentences_emb = inputs_embeddings_dev[it_] #(8,32,4096)
                    #ref = outputs_refs_dev[it_] #(1,32)
                    if sentences_emb.shape[
                            0] == 0:  #TODO il y a des tensors vide, par exemple le 142ème en partant de 0
                        print('tensor empty wtf')
                        continue
                    sentences_emb = sentences_emb.to(device)
                    X_length = X_length.to(device)
                    ref = ref.to(device)

                    if type_sentence_embedding == 'lstm':
                        model.hidden_sentences = model.init_hidden(
                            batch_size=sentences_emb.shape[1]
                        )  #(L,B,D) -> (109,8..,300)
                        #model.hidden = model.init_hidden(batch_size=int(sentences_emb.shape[1]/model.taille_context))
                        # Step 3. Run our forward pass.
                        sentences_emb_ = model.forward_sentence(
                            sentences_emb, X_length)  #(32,300)
                        to_packed_X = []
                        to_packed_Y = []
                        ref = ref.squeeze(0)
                        #print(sentences_emb_.shape[0], model.taille_context, sentences_emb_.shape[0] - model.taille_context, ref.size())
                        for i in range(sentences_emb_.shape[0] -
                                       model.taille_context + 1):
                            to_packed_X.append(
                                torch.index_select(
                                    sentences_emb_, 0,
                                    torch.tensor(list(
                                        range(i, i + model.taille_context)),
                                                 device=device)))
                            to_packed_Y.append(
                                torch.index_select(
                                    ref, 0,
                                    torch.tensor([
                                        i + (int(model.taille_context / 2) - 1)
                                    ],
                                                 device=device)))
                        sentences_emb = torch.stack(to_packed_X).transpose(
                            0, 1)  #(n,8,300) -> (8,n,300)
                        sentences_emb = sentences_emb.to(device)
                        ref = torch.stack(to_packed_Y).transpose(
                            0, 1)  #(n,1) -> (1,n)

                    model.hidden = model.init_hidden(
                        batch_size=sentences_emb.shape[1])

                    prediction = model(
                        sentences_emb
                    )  #(32,1)    #(1,32,4096) or (109,8*32,300) ?
                    prediction = torch.squeeze(prediction, 1)
                    ref = torch.squeeze(ref, 0)
                    if aa:
                        print(prediction, ref)
                        aa = False

                    loss = criterion(prediction, ref)  #targets)
                    losses_dev_.append(loss.item())
                    if not best_loss_dev or loss < best_loss_dev:
                        torch.save(
                            model.state_dict(), path_model + 'model_best_' +
                            str(epoch) + '.pth.tar')
                        best_loss_dev = loss
                        id_best_loss_dev = epoch

                    Y_pred.append(np.asarray(prediction.detach().to('cpu')))
                    Y_ref.append(np.asarray(ref.to('cpu')))

            # Calculate the EER
            Y_ref = np.concatenate(Y_ref, axis=None)
            Y_pred = np.concatenate(Y_pred, axis=None)
            fpr, tpr, threshold = roc_curve(Y_ref, Y_pred, pos_label=1)
            fnr = 1 - tpr
            eer_threshold_dev = threshold[np.nanargmin(np.absolute(
                (fnr - fpr)))]
            eer_dev = fpr[np.nanargmin(np.absolute((fnr - fpr)))]

            plt.plot(fpr,
                     tpr,
                     lw=1,
                     alpha=0.3,
                     label='ROC fold (AUC = %0.2f)' % (auc(fpr, tpr)))
            plt.xlim([-0.05, 1.05])
            plt.ylim([-0.05, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('Receiver operating characteristic curve')
            plt.legend(loc="lower right")
            #plt.show()
            plt.savefig(config['path_work'] + 'roc_curve_' + str(epoch) +
                        '.pdf')
            plt.close()

            mean_loss_dev = np.mean(np.asarray(losses_dev_))
            print('Mean loss on dev', mean_loss_dev)
            print('EER threshold, fpr (train/dev)', eer_threshold_train,
                  eer_train, eer_threshold_dev, eer_dev)
            print('id_best_loss_dev', id_best_loss_dev)
            losses_dev.append(mean_loss_dev)
            #scheduler.step(mean_loss_dev)
            writer.add_scalars(
                'data/scalar_group', {
                    'loss_train': mean_loss_train,
                    'loss_dev': mean_loss_dev,
                    'score_train': eer_train,
                    'score_dev': eer_dev
                }, epoch)
            writer.add_pr_curve('score_dev', np.mean(Y_ref), np.mean(Y_pred),
                                epoch)
            #writer.add_pr_curve('roc_dev', np.mean(Y_ref), np.mean(Y_pred), epoch)

    writer.close()
    print('Best model on epoch',
          id_best_loss_dev)  #Aller chercher à main et copier le bon modèle
    #torch.save(model.state_dict(), path_model+'model_best_'+str(id_best_loss_dev)+'.pth.tar')
    print('fin train')
    return losses
Exemplo n.º 11
0
class TensorboardXLogger(NumpySeabornPlotLogger):
    """Logger that uses tensorboardX to log to Tensorboard."""
    def __init__(self, target_dir, *args, **kwargs):

        super(TensorboardXLogger, self).__init__(*args, **kwargs)

        os.makedirs(target_dir, exist_ok=True)

        self.writer = SummaryWriter(target_dir)
        self.val_dict = defaultdict(int)

        atexit.register(self.writer.close)

    def show_image(self, image, name="Image", counter=None, **kwargs):
        """
        Sends an image to tensorboard.

        Args:
            image (np.narray/torch.tensor): Image array/tensor which will be sent
            name (str): Identifier for the image
            counter (int): Global step value
        """

        if counter is not None:
            self.val_dict["{}-image".format(name)] = counter
        else:
            self.val_dict["{}-image".format(name)] += 1

        self.writer.add_image(
            name, image, global_step=self.val_dict["{}-image".format(name)])

    def show_images(self, images, name="Images", counter=None, **kwargs):
        """
        Sends multiple images to tensorboard.

        Args:
            image (np.narray/torch.tensor): Image array/tensor which will be sent (NxCxHxW)
            name (str): Identifier for the images
            counter (int): Global step value
        """

        if counter is not None:
            self.val_dict["{}-image".format(name)] = counter
        else:
            self.val_dict["{}-image".format(name)] += 1

        self.writer.add_images(
            name, images, global_step=self.val_dict["{}-image".format(name)])

    @convert_params
    def show_value(self,
                   value,
                   name="Value",
                   counter=None,
                   tag=None,
                   **kwargs):
        """
        Sends a scalar value to tensorboard.

        Args:
            value (numeric): Value to be sent
            name (str): Identifier for the value
            counter (int): Global step value
            tag (str): Identifier for the frame (values with the same tag will be shown in the same graph)
        """

        if tag is None:
            key = name + "-" + name
        else:
            key = tag + "-" + name

        if counter is not None:
            self.val_dict["{}-image".format(key)] = counter
        else:
            self.val_dict["{}-image".format(key)] += 1

        if tag is not None:
            self.writer.add_scalars(
                tag, {name: value},
                global_step=self.val_dict["{}-image".format(key)])
            self.writer.scalar_dict = {}
        else:
            self.writer.add_scalar(
                name, value, global_step=self.val_dict["{}-image".format(key)])

    def show_text(self, text, name="Text", counter=None, **kwargs):
        """
        Sends text to tensorboard.

        Args:
            text (str): Text to be sent
            name (str): Identifier for the text
            counter (int): Global step value
        """

        if counter is not None:
            self.val_dict["{}-text".format(name)] = counter
        else:
            self.val_dict["{}-text".format(name)] += 1

        self.writer.add_text(name,
                             text,
                             global_step=self.val_dict["{}-text".format(name)])

    @convert_params
    def show_image_grid(self,
                        image_array,
                        name="Image-Grid",
                        counter=None,
                        nrow=8,
                        padding=2,
                        normalize=False,
                        range=None,
                        scale_each=False,
                        pad_value=0,
                        *args,
                        **kwargs):
        """
        Sends an array of images to tensorboard as a grid. Like :meth:`.show_image`, but generates
        image grid before.

        Args:
            image_array (np.narray/torch.tensor): Image array/tensor which will be sent as an image grid
            name (str): Identifier for the image grid
            counter (int): Global step value
            nrow (int): Items per row in grid
            padding (int): Padding between images in grid
            normalize (bool): Normalize images in grid
            range (tuple): Tuple (min, max), so images will be normalized to this range
            scale_each (bool): If True, each image will be normalized separately instead of using
                min and max of whole tensor
            pad_value (float): Fill padding with this value
        """

        image_args = dict(nrow=nrow,
                          padding=padding,
                          normalize=normalize,
                          range=range,
                          scale_each=scale_each,
                          pad_value=pad_value)

        if counter is not None:
            self.val_dict["{}-image".format(name)] = counter
        else:
            self.val_dict["{}-image".format(name)] += 1

        grid = np_make_grid(image_array, **image_args)
        self.writer.add_image(
            tag=name,
            img_tensor=grid,
            global_step=self.val_dict["{}-image".format(name)])
        self.val_dict[name] += 1

    @convert_params
    def show_barplot(self,
                     array,
                     name="barplot",
                     counter=None,
                     *args,
                     **kwargs):
        """
        Sends a barplot to tensorboard.

        Args:
            array (np.array/torch.tensor): array of shape NxM where N is the number of rows and M is the number of elements in the row.
            name (str): The name of the figure
            counter (int): Global step value to record

        """

        if counter is not None:
            self.val_dict["{}-figure".format(name)] = counter
        else:
            self.val_dict["{}-figure".format(name)] += 1

        figure = super().show_barplot(array, name, *args, **kwargs)
        self.writer.add_figure(
            tag=name,
            figure=figure,
            global_step=self.val_dict["{}-figure".format(name)])

    @convert_params
    def show_lineplot(self,
                      y_vals,
                      x_vals=None,
                      name="lineplot",
                      counter=None,
                      *args,
                      **kwargs):
        """
        Sends a lineplot to tensorboard.

        Args:
            y_vals (np.array/torch.tensor): Array of shape MxN , where M is the number of points and N is the number of different line
            x_vals (np.array/torch.tensor): Has to have the same shape as Y: MxN. For each point in Y it gives the corresponding X value (if
                not set the points are assumed to be equally distributed in the interval [0, 1])
            name (str): The name of the figure
            counter (int): Global step value to record

        """

        if counter is not None:
            self.val_dict["{}-figure".format(name)] = counter
        else:
            self.val_dict["{}-figure".format(name)] += 1

        figure = super().show_lineplot(y_vals, x_vals, name, *args, **kwargs)
        self.writer.add_figure(
            tag=name,
            figure=figure,
            global_step=self.val_dict["{}-figure".format(name)])

    @convert_params
    def show_scatterplot(self,
                         array,
                         name="scatterplot",
                         counter=None,
                         *args,
                         **kwargs):
        """
        Sends a scatterplot to tensorboard.

        Args:
            array (np.array/torch.tensor): An array with size N x dim, where each element i \in N` at X[i] results in a 2D
                (if dim = 2) or 3D (if dim = 3) point.
            name (str): The name of the figure
            counter (int): Global step value to record

        """

        if counter is not None:
            self.val_dict["{}-figure".format(name)] = counter
        else:
            self.val_dict["{}-figure".format(name)] += 1

        figure = super().show_scatterplot(array, name, *args, **kwargs)
        self.writer.add_figure(
            tag=name,
            figure=figure,
            global_step=self.val_dict["{}-figure".format(name)])

    @convert_params
    def show_piechart(self,
                      array,
                      name="piechart",
                      counter=None,
                      *args,
                      **kwargs):
        """
        Sends a piechart tensorboard.

        Args:
            array (np.array/torch.tensor): Array of positive integers. Each integer will be
                presented as a part of the pie (with the total as the sum of all integers)
            name (str): The name of the figure
            counter (int): Global step value to record

        """

        if counter is not None:
            self.val_dict["{}-figure".format(name)] = counter
        else:
            self.val_dict["{}-figure".format(name)] += 1

        figure = super().show_piechart(array, name, *args, **kwargs)
        self.writer.add_figure(
            tag=name,
            figure=figure,
            global_step=self.val_dict["{}-figure".format(name)])

    def show_embedding(self,
                       tensor,
                       labels=None,
                       name='default',
                       label_img=None,
                       counter=None,
                       *args,
                       **kwargs):
        """
        Displays an embedding of a tensor (for more details see tensorboardX)

        Args:
            tensor (torch.tensor/np.array): Tensor to be embedded and then displayed
            labels (list): List of labels, each element will be converted to string
            name (str): The name for the embedding
            label_img (torch.tensor): Images to be displayed at the embedding points
            counter (int):  Global step value to record

        """

        if counter is not None:
            self.val_dict["{}-embedding".format(name)] = counter
        else:
            self.val_dict["{}-embedding".format(name)] += 1

        self.writer.add_embedding(
            mat=tensor,
            metadata=labels,
            label_img=label_img,
            tag=name,
            global_step=self.val_dict["{}-embedding".format(name)])

    def show_histogram(self,
                       array,
                       name="Histogram",
                       counter=None,
                       *args,
                       **kwargs):
        """
        Plots a histogram in the tensorboard histrogram plugin

        Args:
            array (torch.tensor/np.array): Values to build histogram
            name (str): Data identifier
            counter (int):  Global step value to record

        """

        if counter is not None:
            self.val_dict["{}-histogram".format(name)] = counter
        else:
            self.val_dict["{}-histogram".format(name)] += 1

        self.writer.add_histogram(
            tag=name,
            values=array,
            global_step=self.val_dict["{}-histogram".format(name)])

    def show_pr_curve(self,
                      tensor,
                      labels,
                      name="pr-curve",
                      counter=None,
                      *args,
                      **kwargs):
        """
        Displays a precision recall curve given a tensor with scores and the corresponding labels

        Args:
            tensor (torch.tensor/np.array): Tensor with scores (e.g class probabilities)
            labels (list): Labels of the samples to which the scores match
            name (str): The name of the plot
            counter (int): Global step value
        """

        if counter is not None:
            self.val_dict["{}-pr-curve".format(name)] = counter
        else:
            self.val_dict["{}-pr-curve".format(name)] += 1

        self.writer.add_pr_curve(
            tag=name,
            labels=labels,
            predictions=tensor,
            global_step=self.val_dict["{}-pr-curve".format(name)])

    def close(self):
        self.writer.close()
Exemplo n.º 12
0
# region Final report
pd.options.display.precision = 2
pd.options.display.max_columns = 999
pd.options.display.expand_frame_repr = False

nodes_df = pd.DataFrame({k: np.concatenate(v) for k, v in nodes_df.items()})
experiment.average_precision = sklearn.metrics.average_precision_score(
    y_true=nodes_df.Targets, y_score=nodes_df.Results)
print('Average precision:', experiment.average_precision)
if logger is not None:
    logger.add_scalar('metrics/val/avg_precision',
                      experiment.average_precision,
                      global_step=experiment.samples)
    logger.add_pr_curve('infection',
                        labels=nodes_df.Targets.values,
                        predictions=nodes_df.Results.values,
                        global_step=experiment.samples)

# noinspection PyUnreachableCode
if False:
    import matplotlib.pyplot as plt

    precision, recall, _ = sklearn.metrics.precision_recall_curve(
        y_true=nodes_df.Targets, probas_pred=nodes_df.Results)
    plt.step(recall, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(recall, precision, alpha=0.2, color='b', step='post')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title(f'Precision-Recall curve: AP={experiment.average_precision:.2f}')
Exemplo n.º 13
0
     img = batch['image'].to(device)
     mask = batch['mask'].to(device)
     with torch.no_grad():
         pred, loss = model(img, mask)
     pred = pred[5].data
     mse += F.mse_loss(pred, mask)
     pred = pred.requires_grad_(False)
     preds.append(pred)
     masks.append(mask)
     if not i < 100:
         break
 pred = torch.stack(preds, 0)
 mask = torch.stack(masks, 0)
 writer.add_pr_curve('PR_curve',
                     mask,
                     pred,
                     global_step=int(
                         model_name.split('epo_')[1].split('step')[0]))
 writer.add_scalar('MAE',
                   F.mse_loss(pred, mask),
                   global_step=int(
                       model_name.split('epo_')[1].split('step')[0]))
 prediction = pred.data.cpu().numpy().flatten()
 target = mask.data.round().cpu().numpy().flatten()
 # print(type(prediction))
 precision, recall, threshold = precision_recall_curve(
     target, prediction)
 f_score = (1 + beta_square) * precision * recall / (
     beta_square * precision + recall)
 writer.add_scalar("Max F_score",
                   np.max(f_score),
Exemplo n.º 14
0
def main():    

    ''' --- SELECT DEVICES --- '''
    # Select either gpu or cpu
    device = torch.device("cuda" if args.cuda else "cpu")
    # Select among available GPUs
    if args.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(x) for x in args.gpudevice)
    

    ''' --- CREATE EXPERIMENTS DIRECTORY AND LOGGERS IN TENSORBOARD --- '''
    projdir = sys.path[0]
    # Path for saving and loading the network.
    saveloadpath = os.path.join( projdir, 'experiment\\checkpoints', args.exp_name+'.pth')
    Path(os.path.dirname(saveloadpath)).mkdir(exist_ok=True, parents=True)
    # timestamp = str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M'))
    tblogdir = os.path.join( projdir, 'experiment\\tensorboardX', args.exp_name ) # + '_' + timestamp )
    Path(tblogdir).mkdir(exist_ok=True, parents=True)
    # Create tb_writer(the writer will be used to write the information on tb) by using SummaryWriter, 
    # flush_secs defines how much seconds need to wait for writing information.
    tb_writer = SummaryWriter( logdir=tblogdir, flush_secs=3, write_to_disk=True)


    ''' --- INIT DATASETS AND DATALOADER (FOR SINGLE EPOCH) --- '''
    # Ideal for PointNet and pointLSTM - dataloader will return (B:batch, S:seq, C:features, N:points)
    dataTransformations = transforms.Compose([
        ToSeries(),
        DataAugmentation(),
        Resampling(maxPointsPerFrame=10),
        ToTensor()
    ])
    # Init nuScenes datasets
    nusc_train = NuScenes(version=args.nuscenes_train_dir, dataroot=args.nuscenes_dir, verbose=True)
    train_dataset = RadarClassDataset(nusc_train, categories=args.categories, sensors=args.sensors, transforms=dataTransformations, sequence_length=1)
    nusc_test = NuScenes(version=args.nuscenes_test_dir, dataroot=args.nuscenes_dir, verbose=True)
    test_dataset = RadarClassDataset(nusc_test, categories=args.categories, sensors=args.sensors, transforms=dataTransformations, sequence_length=1)
    # Init training data loader
    trainDataLoader = DataLoader(train_dataset, batch_size=args.batchsize, shuffle=True, num_workers=args.num_workers)

    ''' --- INIT NETWORK MODEL --- '''
    # Load selected network model and put it to right device
    if args.model_name == 'pointnet':
        classifier = PointNetCls(dim=args.pointCoordDim, num_class=len(args.categories), feature_transform=args.feature_transform)  
    elif args.model_name == 'pointnet2':
        classifier = PointNet2ClsMsg(dim=args.pointCoordDim, num_class=len(args.categories) )
    else:
        raise Exception('Argument "model_name" does not match existent networks')
    classifier = classifier.to(device)

    ''' --- INIT LOSS FUNCTION --- '''
    loss_fun = FocalLoss(gamma=args.focalLoss_gamma, num_classes=len(args.categories), alpha=args.weight_cat).to(device)

    ''' --- LOAD NETWORK IF EXISTS --- '''
    if os.path.exists(saveloadpath):
        print('Using pretrained model found...')
        checkpoint    = torch.load(saveloadpath)
        start_epoch   = checkpoint['epoch'] +1 # Just becase make sure counting starts from 1, 2, ..., rather than 0, 1, ..., when print the information of start_epoch
        iteration     = checkpoint['iteration']
        best_test_acc = checkpoint['test_accuracy']
        classifier.load_state_dict(checkpoint['model_state_dict'])
    else:
        print('No existing model, starting training from scratch...')
        start_epoch   = 1 # Just becase make sure counting starts from 1, 2, ..., rather than 0, 1, ..., when print the information of start_epoch
        iteration     = 1 # Just becase make sure counting starts from 1, 2, ..., rather than 0, 1, ..., when print the information of iteration
        best_test_acc = 0


    ''' --- CREATE OPTIMIZER ---'''
    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(
            classifier.parameters(), 
            lr=args.lr, 
            momentum=0.9)
    elif args.optimizer == 'ADAM':
        optimizer = torch.optim.Adam(
            classifier.parameters(),
            lr=args.lr,
            betas=(0.9, 0.999),
            eps=1e-08,
            weight_decay=args.decay_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_epoch_half, gamma=0.5) # half(0.5) the learning rate every 'step_size' epochs
    

    # Log info
    printparams = 'Model parameters:' + json.dumps(vars(args), indent=4, sort_keys=True)
    print(printparams)
    tb_writer.add_text('hyper-parameters',printparams,iteration) # tb_writer.add_hparam(args)
    tb_writer.add_text('dataset','dataset sample size: training: {}, test: {}'.format(len(train_dataset),len(test_dataset)),iteration)


    ''' --- START TRANING ---'''
    for epoch in range(start_epoch, args.epoch+1):
    # epoch = start_epoch
        print('Epoch %d/%s:' % (epoch, args.epoch))
        # Add the "learning rate" into tensorboard scalar which will be shown in tensorboard
        tb_writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], iteration)

        # Beware epochs_left = args.epoch - epoch
        for batch_id, data in tqdm(enumerate(trainDataLoader, 0), total=len(trainDataLoader), smoothing=0.9):
            points, target = data   # (B:batch x S:seq x C:features x N:points) , (B x S:seq) 
            # Squeeze to drop Sequence dimension, which is equal to 1, convert all the data to float(otherwise there will be data type problems when running the model) and move to device
            points, target = points.squeeze(dim=1).float().to(device), target.float().to(device) # (B:batch x C:features x N:points) , (B)
            # points, target = points.float().to(device), target.float().to(device)
            # Reset gradients
            optimizer.zero_grad()
            # Sets the module in training mode
            classifier = classifier.train()           
            # Forward propagation
            pred = classifier(points)
            # MLE estimator = min (- log (softmax(x)) ) = min nll_loss(log_softmax(x))
            # loss = F.nll_loss(pred, target.long())
            loss = loss_fun(pred, target.long())
            if args.model_name == 'pointnet':
                loss += feature_transform_regularizer(classifier.trans) * 0.001
                if args.feature_transform:
                    loss +=  feature_transform_regularizer(classifier.trans_feat) * 0.001
            # Back propagate
            loss.backward()
            # Update weights
            optimizer.step()            
            # Log once for every 5 batches, add the "train_loss/cross_entropy" into tensorboard scalar which will be shown in tensorboard
            if not batch_id % 5: tb_writer.add_scalar('train_loss/cross_entropy', loss.item(), iteration)
            iteration += 1

            # Plot train confusion matrix every X steps
            if not iteration % 20:
                confmatrix_train = metrics_confusion_matrix(target, pred)
                print('\nTrain confusion matrix: \n',confmatrix_train)

            # We just finished one epoch
            # if not batch_id+1 % int(train_dataset.len__()/args.batchsize):

        ''' --- TEST NETWORK --- '''
        if not epoch % int(args.test_every_X_epochs): # Doing the following things every epoch.
            # Perform predictions on the training data.
            train_targ, train_pred = test(classifier, train_dataset, device, num_workers=0, batch_size=512)
            # Perform predictions on the testing data.
            test_targ,  test_pred  = test(classifier, test_dataset, device,  num_workers=0, batch_size=512)
            
            # Calculate the accuracy rate for training data.
            train_acc = metrics_accuracy(train_targ, train_pred)
            # Calculate the accuracy rate for testing data.
            test_acc  = metrics_accuracy(test_targ,  test_pred)
            print('\r Training loss: {}'.format(loss.item()))
            print('Train Accuracy: {}\nTest Accuracy: {}'.format(train_acc, test_acc) )
            # Add the "train_acc" "test_acc" into tensorboard scalars which will be shown in tensorboard.                       
            tb_writer.add_scalars('metrics/accuracy', {'train':train_acc, 'test':test_acc}, iteration)
            
            # Calculate confusion matrix.
            confmatrix_test = metrics_confusion_matrix(test_targ, test_pred)
            print('Test confusion matrix: \n',confmatrix_test)
            # Log confusion matrix.
            fig,   ax   = plot_confusion_matrix(confmatrix_test, args.categories, normalize=False, title='Test Confusion Matrix')
            # Log normalized confusion matrix.
            fig_n, ax_n = plot_confusion_matrix(confmatrix_test, args.categories, normalize=True,  title='Test Confusion Matrix - Normalized')
            # Add the "confusion matrix" "normalized confusion matrix" into tensorboard figure which will be shown in tensorboard.
            tb_writer.add_figure('test_confusion_matrix/abs',  fig,   global_step=iteration, close=True)
            tb_writer.add_figure('test_confusion_matrix/norm', fig_n, global_step=iteration, close=True)

            # Log precision recall curves.
            for idx, clsname in enumerate(args.categories):
                # Convert log_softmax to softmax(which is actual probability) and select the desired class.
                test_pred_binary = torch.exp(test_pred[:,idx])
                test_targ_binary = test_targ.eq(idx)
                # Add the "precision recall curves" which will be shown in tensorboard.
                tb_writer.add_pr_curve(tag='pr_curves/'+clsname, labels=test_targ_binary, predictions=test_pred_binary, global_step=iteration)

            # Store the best test accuracy
            if (test_acc >= best_test_acc):
                best_test_acc = max([best_test_acc, test_acc])
                # NOTE: we possibly want to save only when when the best test accuracy is surpassed. For now lets save every X epoch
        
        ''' --- SAVE NETWORK --- '''
        if not epoch % int(args.save_every_X_epochs):
            print('Best Accuracy: %f'%best_test_acc)
            state = {
                'epoch': epoch,
                'iteration': iteration,
                'test_accuracy': best_test_acc,
                'model_state_dict': classifier.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }
            torch.save(state, saveloadpath)
            print('Model saved!!!')
                
            # epoch += 1
            # print('Epoch %d/%s:' % (epoch, args.epoch))
        scheduler.step()
        
    
    tb_writer.close()
Exemplo n.º 15
0
        loss = F.binary_cross_entropy(y_pred, labels)
        if i%50 == 0:
            print ("epoch ", epoch_id, " loss", i, loss.item())

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        for name, param in model.named_parameters():
            if debug_gradient:
                if param.requires_grad:
                    print (name, "\n", param.data, "\n", "grad", param.grad)

    writer.add_pr_curve("pr_curve, epoch_id:" + str(epoch_id), valid_y, model(valid_X))

    writer.add_scalars('loss', {'training': F.binary_cross_entropy(model(train_X), train_y),
                               'validation': F.binary_cross_entropy(model(valid_X), valid_y)}, epoch_id)

print ("Evaluating after training")
y_pred = model(test_X)
loss = F.binary_cross_entropy(y_pred, test_y)

y_pred_numpy = y_pred.detach().numpy()
test_y_numpy = test_y.detach().numpy()

writer.add_scalars('precision/recall/f1', {
                                'precision': precision_score(test_y_numpy, y_pred_numpy > 0.5, average='samples'),
                                'recall': recall_score(test_y_numpy, y_pred_numpy > 0.5, average='samples'),
                                'f1_score': f1_score(test_y_numpy, y_pred_numpy > 0.5, average='samples')}, 1)
Exemplo n.º 16
0
class Unet(nn.Module):
    def __init__(self,
                 nb_classes,
                 experiment,
                 device,
                 c_in=1,
                 nb_blocks=4,
                 nb_layers=2,
                 nb_channels=8):

        self.nb_classes = nb_classes
        self.nb_blocks = nb_blocks
        self.nb_layers = nb_layers
        self.c_in = c_in
        self.c_ker = nb_channels

        self.experiment = experiment
        self.device = device

        super(Unet, self).__init__()

        block = []
        # Downsampling
        for _ in range(self.nb_blocks):
            block += block_downsampling(self.nb_layers, self.c_in, self.c_ker)
            self.c_in = self.c_ker
            self.c_ker *= 2
        self.down = nn.Sequential(*block)

        bottom = []
        # In-between downsampling and upsampling
        for _ in range(self.nb_layers):
            bottom.append(
                nn.Conv2d(self.c_in, self.c_ker, (3, 3), stride=1, padding=1))
            bottom.append(nn.ReLU())
            self.c_in = self.c_ker
        self.bottom = nn.Sequential(*bottom)

        block = []
        # Upsampling
        for _ in range(self.nb_blocks):
            block += block_upsampling(self.nb_layers, self.c_in, self.c_ker)
            self.c_ker //= 2
            self.c_in = self.c_ker
        self.up = nn.Sequential(*block)

        # Last step
        self.lastConv = nn.Conv2d(self.c_in,
                                  self.nb_classes, (3, 3),
                                  stride=1,
                                  padding=1)

        # Resizing for targets
        self.avgPool = nn.AvgPool2d((2, 2))

        self.activation = nn.Sigmoid()

        # Weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                nn.init.kaiming_normal_(m.weight)

    def forward(self, x):
        target_shape = x.shape
        skip_connections = []

        # Downsampling
        for mod in list(self.down.modules())[1:]:
            if isinstance(mod, nn.MaxPool2d):
                skip_connections.append(x)
            x = mod(x)

        # In-between downsampling and upsampling
        for mod in list(self.bottom.modules())[1:]:
            x = mod(x)

        # Upsampling
        for mod in list(self.up.modules())[1:]:
            x = mod(x)
            if isinstance(mod, nn.ConvTranspose2d):
                last = skip_connections.pop()
                if last.shape != x.shape:
                    x = F.pad(x, (0, last.shape[-1] - x.shape[-1],
                                  last.shape[-2] - x.shape[-2], 0),
                              mode='constant',
                              value=0)
                x = torch.cat((x, last), dim=1)

        # Last step
        x = self.lastConv(x)

        # Resizing for targets
        if x.shape != target_shape:
            x = self.avgPool(x)

        return x

    # Method used to train the model and evaluate it at each epoch
    def train_model(self, data_loader, nb_epoch, lr):
        self.train(True)

        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(self.parameters(), lr=lr)

        self.best_loss = np.inf

        # Initialize a writer to output logs for graphs in tensorboard
        self.writer = SummaryWriter(
            os.path.join(self.experiment, 'train', 'logs'))

        self.save_path = os.path.join(self.experiment, 'train', 'models')
        print('Models will be saved to {}/{}'.format(os.path.dirname(__file__),
                                                     self.save_path))
        print('Start training...')
        for epoch in range(nb_epoch):
            self.current_epoch = epoch + 1
            print('Epoch n°{}'.format(self.current_epoch))
            self.train_epoch(data_loader, optimizer, criterion)

            # Save model every 1/5th of the total number of epochs
            if self.current_epoch % (nb_epoch // 5) == 0:
                print('Model saved.')
                torch.save(
                    self.state_dict(),
                    os.path.join(self.save_path,
                                 'model{}.pth'.format(self.current_epoch)))

        self.writer.export_scalars_to_json(
            os.path.join(self.experiment, 'train', 'logs', 'scalar_hist.json'))
        self.writer.close()

    def train_epoch(self, data_loader, optimizer, criterion):
        for p, phase in enumerate(['train', 'val']):
            # Loss and accuracy for the current epoch at each phase
            running_loss = 0.0
            running_accuracy = 0.0
            running_recall = 0.0
            running_precision = 0.0

            # Estimation of center of grapes
            centers_pred = np.array([])
            centers_data = np.array([])
            centers_true = np.array([])

            # Only enable gradients for training
            torch.set_grad_enabled((phase == 'train'))
            for n_batch, data in enumerate(data_loader[p]):
                # Read the data
                inputs = data['image'].to(self.device)
                target = data['target'].to(self.device)

                # Forward path + loss computing
                output = self(inputs)
                loss = criterion(output, target)
                running_loss += loss.item()

                # Zero the parameter gradients and optimize the weights
                if phase == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                threshold = 0.5  # Threshold for the activation function
                scores = self.activation(output)
                pred = (scores > threshold).float()

                # Compute TP, FP, FN and FN for precision, recall and accuracy
                # When 1 pixels are equal
                tp = torch.sum((pred == 1) * (target == 1)).item()
                # Don't reward detection of berries' visible pixels
                tp -= torch.sum((pred == 1) * (inputs == 1)).item()
                # When 1 pixels are on the background
                fp = torch.sum((pred == 1) * (target == 0)).item()
                # When 0 pixels are on the berry
                fn = torch.sum((pred == 0) * (target == 1)).item()
                # When 0 pixels are on the background
                tn = torch.sum((pred == 0) * (target == 0)).item()
                running_precision += tp / (tp + fp) if (tp + fp) != 0 else 0
                running_recall += tp / (tp + fn) if (tp + fn) != 0 else 0
                running_accuracy += (tp + tn) / (tp + tn + fp + fn)

                # Retrieve centers for estimation center error
                centers_data = np.append(centers_data,
                                         data['image_center'].numpy())
                centers_true = np.append(centers_true,
                                         data['target_center'].numpy())
                for j in range(inputs.shape[0]):
                    cX, cY = find_center(
                        pred[j, 0].cpu().numpy().astype(np.uint8), 'pred')
                    centers_pred = np.append(centers_pred, np.array([cX, cY]))
                break

            # Normalize metrics
            running_loss /= (n_batch + 1)
            running_precision /= (n_batch + 1)
            running_recall /= (n_batch + 1)
            running_accuracy /= (n_batch + 1)

            # Compute other metrics
            if (running_precision + running_recall) != 0:
                f_score = (2*running_precision*running_recall) / \
                    (running_precision+running_recall)
            else:
                f_score = 0
            pr_prec, pr_rec, _ = precision_recall_curve(
                y_true=target.view(-1).cpu().numpy(),
                probas_pred=scores.view(-1).detach().cpu().numpy())
            auc_score = auc(pr_rec, pr_prec)
            ap_score = average_precision_score(
                y_true=target.view(-1).cpu().numpy(),
                y_score=pred.view(-1).cpu().numpy())

            # Compute estimation centers error with L1-norm
            centers_data = centers_data.reshape(-1, 2)
            centers_true = centers_true.reshape(-1, 2)
            centers_pred = centers_pred.reshape(-1, 2)
            l1_dist_baseline = np.abs(centers_true - centers_data)
            l1_dist_baseline = np.sum(l1_dist_baseline, axis=1)
            l1_dist_baseline = np.mean(l1_dist_baseline)
            l1_dist_pred = np.abs(centers_true - centers_pred)
            l1_dist_pred = np.sum(l1_dist_pred, axis=1)
            l1_dist_pred = np.mean(l1_dist_pred)

            # Matplotlib figure of the predictions stored on tensorboard
            fig = prediction_figure(pred=pred,
                                    inputs=inputs,
                                    target=target,
                                    phase=phase,
                                    epoch=self.current_epoch)

            # Write computed metrics in tensorboard
            self.writer.add_scalar('{}/accuracy'.format(phase),
                                   running_accuracy, self.current_epoch)
            self.writer.add_scalar('{}/auc'.format(phase), auc_score,
                                   self.current_epoch)
            self.writer.add_scalar('{}/aver_prec'.format(phase), ap_score,
                                   self.current_epoch)
            self.writer.add_scalar('{}/l1-error-baseline'.format(phase),
                                   l1_dist_baseline, self.current_epoch)
            self.writer.add_scalar('{}/l1-error-pred'.format(phase),
                                   l1_dist_pred, self.current_epoch)
            self.writer.add_scalar('{}/f-score'.format(phase), f_score,
                                   self.current_epoch)
            self.writer.add_scalar('{}/loss'.format(phase), running_loss,
                                   self.current_epoch)
            self.writer.add_scalar('{}/precision'.format(phase),
                                   running_precision, self.current_epoch)
            self.writer.add_scalar('{}/recall'.format(phase), running_recall,
                                   self.current_epoch)
            self.writer.add_pr_curve('{}/pr_curve'.format(phase),
                                     target.view(-1), pred.view(-1),
                                     self.current_epoch)
            self.writer.add_figure('{}/prediction'.format(phase), fig,
                                   self.current_epoch)

            if phase == 'val':
                # Save the model with the best loss
                if running_loss < self.best_loss:
                    self.best_loss = running_loss
                    print('Save best model.')
                    torch.save(self.state_dict(),
                               os.path.join(self.save_path, 'best_model.pth'))

    def predict(self, test_loader, threshold):
        self.eval()

        # Predictions and labels for the confusion matrix
        y_true = np.array([])
        y_pred = np.array([])
        # Scores for the PR-Curve
        y_score = np.array([])
        # Estimation of center of grapes
        centers_pred = np.array([])
        centers_data = np.array([])
        centers_true = np.array([])
        with torch.no_grad():
            # Solely for evaluation purpose
            if isinstance(test_loader, torch.utils.data.dataloader.DataLoader):
                print('Start evaluation...')
                for i, data in enumerate(test_loader):
                    # Read the data
                    inputs = data['image'].to(self.device)

                    # Forward path
                    output = self(inputs)
                    scores = self.activation(output)
                    pred = (scores > threshold).float()
                    target = data['target']

                    idx = ((pred == 1) * (inputs == 1))
                    preds = pred[~idx]
                    target = target[~idx]
                    scores = scores[~idx]

                    y_pred = np.append(y_pred, preds.view(-1).cpu().numpy())
                    y_true = np.append(y_true, target.view(-1).numpy())
                    y_score = np.append(y_score, scores.view(-1).cpu().numpy())

                    centers_data = np.append(centers_data,
                                             data['image_center'].numpy())
                    centers_true = np.append(centers_true,
                                             data['target_center'].numpy())
                    for j in range(inputs.shape[0]):
                        cX, cY = find_center(
                            pred[j, 0].cpu().numpy().astype(np.uint8), 'pred')
                        if cX != 0 and cY != 0:
                            centers_pred = np.append(centers_pred,
                                                     np.array([cX, cY]))
                        else:
                            centers_pred = np.append(
                                centers_pred, data['image_center'][j].numpy())

                    if (i + 1) % (len(test_loader) // 5) == 0:
                        print('Done: {}/{}'.format(i + 1, len(test_loader)))
                print('Evaluation is finished. Metrics are being computed...')
                save_classification_report(
                    y_true, y_pred, threshold,
                    os.path.join(self.experiment, 'eval', 'class_rep.png'))
                centers_data = centers_data.reshape(-1, 2)
                centers_true = centers_true.reshape(-1, 2)
                centers_pred = centers_pred.reshape(-1, 2)
                l1_dist_baseline = np.abs(centers_true - centers_data)
                l1_dist_baseline = np.sum(l1_dist_baseline, axis=1)
                l1_dist_baseline = np.mean(l1_dist_baseline)
                l1_dist_pred = np.abs(centers_true - centers_pred)
                l1_dist_pred = np.sum(l1_dist_pred, axis=1)
                l1_dist_pred = np.mean(l1_dist_pred)
                print('Baseline center error: {}'.format(l1_dist_baseline))
                print('Center prediction error: {}'.format(l1_dist_pred))
                print('Classification report plot saved successfully.')
                save_pr_curve_plot(
                    y_true, y_score,
                    os.path.join(self.experiment, 'eval', 'pr_cruve.html'))
                print('PR Curve plot saved successfully.')
            else:
                print('Start amodal completion...')
                # When testing in real time, not with synthetic dataset;
                # must be a tensor of shape [BxCxHxW]
                pred = torch.empty_like(test_loader)
                orig_shape = test_loader.shape
                for b, img in enumerate(test_loader):
                    img = rescale(img, (225, 325))
                    output = self(img.to(self.device))
                    res = (self.activation(output) > threshold).float()
                    res = rescale(res.squeeze().cpu(), orig_shape[2:])
                    pred[b] = res.bool()

        return pred
Exemplo n.º 17
0
            "xcosx": n_iter * np.cos(n_iter),
            "arctanx": np.arctan(n_iter)
        }, n_iter)
    x = torch.rand(32, 3, 64, 64)  # output from network
    if n_iter % 10 == 0:
        x = vutils.make_grid(x, normalize=True, scale_each=True)
        writer.add_image('Image', x, n_iter)
        x = torch.zeros(sample_rate * 2)
        for i in range(x.size(0)):
            x[i] = np.cos(
                freqs[n_iter // 10] * np.pi * float(i) /
                float(sample_rate))  # sound amplitude should in [-1, 1]
        writer.add_audio('myAudio', x, n_iter, sample_rate=sample_rate)
        writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)
        for name, param in vgg16.named_parameters():
            writer.add_histogram(name,
                                 param.clone().cpu().data.numpy(), n_iter)
        writer.add_pr_curve('xoxo', np.random.randint(2, size=100),
                            np.random.rand(100),
                            n_iter)  #needs tensorboard 0.4RC or later
dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.test_data[:100].float()
label = dataset.test_labels[:100]
features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))

# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")

writer.close()
Exemplo n.º 18
0
def run(args):
    print('Task 1: clear cell grade prediction')
    path = '/data/larson2/RCC_dl/new/clear_cell/'

    transform = {
        'train':
        transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            src.dataloader.Rescale(-160, 240,
                                   zero_center=True),  # rset dynamic range
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(3, 0, 1, 2)),
            #     src.dataloader.Normalize(),
            #     src.dataloader.Crop(110),
            #     src.dataloader.RandomCenterCrop(90),
            src.dataloader.RandomHorizontalFlip(),
            #     src.dataloader.RandomRotate(25),
            src.dataloader.Resize(256)
        ]),
        'val':
        transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            src.dataloader.Rescale(-160, 240,
                                   zero_center=True),  # rset dynamic range
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(3, 0, 1, 2)),
            #       src.dataloader.Normalize(),
            #       src.dataloader.Crop(90),
            src.dataloader.Resize(256)
        ])
    }

    my_dataset = {
        'train':
        src.dataloader.RCCDataset_h5(path,
                                     mode='train',
                                     transform=transform['train']),
        'val':
        src.dataloader.RCCDataset_h5(path,
                                     mode='val',
                                     transform=transform['train'])
    }

    my_loader = {
        x: DataLoader(my_dataset[x], batch_size=1, shuffle=True, num_workers=4)
        for x in ['train', 'val']
    }

    print('train size: ', len(my_loader['train']))
    print('train size: ', len(my_loader['val']))

    ### Some Checkers
    print('Summary: ')
    print('\ttrain size: ', len(my_loader['train']))
    print('\ttrain size: ', len(my_loader['val']))
    print('\tDatatype = ', next(iter(my_loader['train']))[0].dtype)
    print('\tMin = ', next(iter(my_loader['train']))[0].min())
    print('\tMax = ', next(iter(my_loader['train']))[0].max())
    print('\tInput size', next(iter(my_loader['train']))[0].shape)
    #     print('\tweight = ', args.weight)

    ### Tensorboard Log Setup
    log_root_folder = "/data/larson2/RCC_dl/logs/"
    now = datetime.now()
    now = now.strftime("%Y%m%d-%H%M%S")
    logdir = os.path.join(
        log_root_folder,
        f"{now}_model_{args.model}_{args.prefix_name}_epoch_{args.epochs}_weight_{args.weight}_lr_{args.lr}_gamma_{args.gamma}_lrsche_{args.lr_scheduler}_{now}"
    )
    #     os.makedirs(logdir)
    print(f'\tlogdir = {logdir}')

    writer = SummaryWriter(logdir)

    ### Model Selection

    device = torch.device(
        "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu")

    model = src.model.TDNet()
    model = model.to(device)

    writer.add_graph(model, my_dataset['train'][0][0].to(device))

    print('\tCuda:', torch.cuda.is_available(), f'\n\tdevice = {device}')

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.1)

    if args.lr_scheduler == "plateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               patience=3,
                                                               factor=.3,
                                                               threshold=1e-4,
                                                               verbose=True)
    elif args.lr_scheduler == "step":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size=3,
                                                    gamma=args.gamma)

    pos_weight = torch.FloatTensor([args.weight]).to(device)
    criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    ### Ready?
    best_val_loss = float('inf')
    best_val_auc = float(0)
    best_model_wts = copy.deepcopy(model.state_dict())
    iteration_change_loss = 0
    t_start_training = time.time()

    ### Here we go
    for epoch in range(args.epochs):
        current_lr = get_lr(optimizer)
        t_start = time.time()

        epoch_loss = {'train': 0., 'val': 0.}
        epoch_corrects = {'train': 0., 'val': 0.}

        epoch_acc = 0.0
        epoch_AUC = 0.0

        for phase in ['train', 'val']:
            if phase == 'train':
                if args.lr_scheduler == "step":
                    scheduler.step()
                model.train()
            else:
                model.eval()

            running_losses = []
            running_corrects = 0.
            y_trues = []
            y_probs = []
            y_preds = []

            print('lr: ', current_lr)
            for i, (inputs, labels, header) in enumerate(my_loader[phase]):
                optimizer.zero_grad()

                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.float())  # raw logits
                    probs = torch.sigmoid(
                        outputs)  # [0, 1] probability, shape = s * 1
                    preds = torch.round(
                        probs
                    )  # 0 or 1, shape = s * 1, prediction for each slice
                    pt_pred, _ = torch.mode(
                        preds, 0
                    )  # take majority vote, shape = 1, prediction for each patient

                    count0 = (preds == 0).sum().float()
                    count1 = (preds == 1).sum().float()
                    pt_prob = count1 / (preds.shape[0])

                    # convert label to slice level
                    loss = criterion(outputs, labels.repeat(
                        inputs.shape[1], 1))  # inputs shape = 1*s*3*256*256

                    # backward + optimize only if in training phases
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # multiple loss by slice num per batch?
                running_losses.append(loss.item())  # * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                y_trues.append(int(labels.item()))
                y_probs.append(pt_prob.item())  # use ratio to get probability
                y_preds.append(pt_pred.item())

                writer.add_scalar(f'{phase}/Loss', loss.item(),
                                  epoch * len(my_loader[phase]) + i)
                writer.add_pr_curve('{phase}pr_curve', y_trues, y_probs, 0)

                if (i % args.log_every == 0) & (i > 0):
                    print(
                        'Epoch: {0}/{1} | Single batch number : {2}/{3} | avg loss:{4} | Acc: {5:.4f} | lr: {6}'
                        .format(epoch + 1, args.epochs, i,
                                len(my_loader[phase]),
                                np.round(np.mean(running_losses), 4),
                                (running_corrects / len(my_loader[phase])),
                                current_lr))

            # epoch statistics
            epoch_loss[phase] = np.round(np.mean(running_losses), 4)
            epoch_corrects[phase] = (running_corrects / len(my_loader[phase]))

            cm = confusion_matrix(y_trues, y_preds, labels=[0, 1])
            src.helper.print_cm(cm, ['0', '1'])
            sens, spec, acc = src.helper.compute_stats(y_trues, y_preds)
            print('sens: {:.4f}'.format(sens))
            print('spec: {:.4f}'.format(spec))
            print('acc:  {:.4f}'.format(acc))
            print()

        print(
            '\ Summary  train loss: {0} | val loss: {1} | train acc: {2:.4f} | val acc: {3:.4f}'
            .format(epoch_loss['train'], epoch_loss['val'],
                    epoch_corrects['train'], epoch_corrects['val']))
        print('-' * 30)
def main(args):
    ''' --- SELECT DEVICES --- '''
    # Select either gpu or cpu
    device = torch.device("cuda" if args.cuda else "cpu")
    # Select among available GPUs
    if args.cuda:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            str(x) for x in args.gpudevice)
    ''' --- CREATE EXPERIMENTS DIRECTORY AND LOGGERS IN TENSORBOARD --- '''
    projdir = sys.path[0]
    # Path for saving and loading the network.
    saveloadpath = os.path.join(projdir, 'experiment\\checkpoints',
                                args.exp_name + '.pth')
    Path(os.path.dirname(saveloadpath)).mkdir(exist_ok=True, parents=True)
    # timestamp = str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M'))
    tblogdir = os.path.join(projdir, 'experiment\\tensorboardX',
                            args.exp_name)  # + '_' + timestamp )
    Path(tblogdir).mkdir(exist_ok=True, parents=True)
    # Create tb_writer(the writer will be used to write the information on tb) by using SummaryWriter,
    # flush_secs defines how much seconds need to wait for writing information.
    tb_writer = SummaryWriter(logdir=tblogdir,
                              flush_secs=3,
                              write_to_disk=True)
    ''' --- INIT DATASETS AND DATALOADER (FOR SINGLE EPOCH) --- '''
    # Read data from file, and create training data and testing data which are both in multiple frames. Beware Ts is
    # recording for every frame, i.e. every 82ms the automotive radar records once to form single frame(We need this information for LSTM).
    train_dataset, test_dataset, class_names = read_dataset(
        args.datapath, Ts=0.082, train_test_split=0.8)

    # Prepare the traing and testing dataset. both trainDataset and testDataset are dataset have multiple frames data,
    # for each frame it contains the "unified number of detection points"(NMAX detection points per frame).

    # Init test dataset(Beware we should NOT use data augmentation for test dataset)
    test_dataTransformations = transforms.Compose(
        [NormalizeTime(), Resampling(maxPointsPerFrame=10)])
    testDataset = RadarClassDataset(dataset=test_dataset,
                                    transforms=test_dataTransformations,
                                    sequence_length=1)
    # Init train datasets
    train_dataTransformations = transforms.Compose([
        NormalizeTime(),
        DataAugmentation(),
        Resampling(maxPointsPerFrame=10)
    ])
    trainDataset = RadarClassDataset(dataset=train_dataset,
                                     transforms=train_dataTransformations,
                                     sequence_length=1)
    # Create dataloader for training by using batch_size frames' data in each batch
    trainDataLoader = DataLoader(trainDataset,
                                 batch_size=args.batchsize,
                                 shuffle=True,
                                 num_workers=args.num_workers)
    ''' --- INIT NETWORK MODEL --- '''
    # Load selected network model and put it to right device
    if args.model_name == 'pointnet':
        classifier = PointNetCls(dim=args.pointCoordDim,
                                 num_class=args.numclasses,
                                 feature_transform=args.feature_transform)
    elif args.model_name == 'pointnet2':
        classifier = PointNet2ClsMsg(
            dim=args.pointCoordDim,
            num_class=args.numclasses,
        )
    else:
        raise Exception(
            'Argument "model_name" does not match existent networks')
    classifier = classifier.to(device)
    ''' --- LOAD NETWORK IF EXISTS --- '''
    if os.path.exists(saveloadpath):
        print('Using pretrained model found...')
        checkpoint = torch.load(saveloadpath)
        start_epoch = checkpoint[
            'epoch'] + 1  # Just becase make sure counting starts from 1, 2, ..., rather than 0, 1, ..., when print the information of start_epoch
        iteration = checkpoint['iteration']
        best_test_acc = checkpoint['test_accuracy']
        classifier.load_state_dict(checkpoint['model_state_dict'])
    else:
        print('No existing model, starting training from scratch...')
        start_epoch = 1  # Just becase make sure counting starts from 1, 2, ..., rather than 0, 1, ..., when print the information of start_epoch
        iteration = 1  # Just becase make sure counting starts from 1, 2, ..., rather than 0, 1, ..., when print the information of iteration
        best_test_acc = 0
    ''' --- CREATE OPTIMIZER ---'''
    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(classifier.parameters(),
                                    lr=args.lr,
                                    momentum=0.9)
    elif args.optimizer == 'ADAM':
        optimizer = torch.optim.Adam(classifier.parameters(),
                                     lr=args.lr,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=args.decay_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=args.lr_epoch_half,
        gamma=0.5)  # half(0.5) the learning rate every 'step_size' epochs

    # log info
    printparams = 'Model parameters:' + json.dumps(
        vars(args), indent=4, sort_keys=True)
    print(printparams)
    tb_writer.add_text('hyper-parameters', printparams,
                       iteration)  # tb_writer.add_hparam(args)
    tb_writer.add_text(
        'dataset', 'dataset sample size: training: {}, test: {}'.format(
            train_dataset.shape[0], test_dataset.shape[0]), iteration)
    ''' --- START TRANING ---'''
    for epoch in range(start_epoch, args.epoch + 1):
        print('Epoch %d/%s:' % (epoch, args.epoch))

        # Add the "learning rate" into tensorboard scalar which will be shown in tensorboard
        tb_writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'],
                             iteration)

        for batch_id, data in tqdm(enumerate(trainDataLoader, 0),
                                   total=len(trainDataLoader),
                                   smoothing=0.9):
            points, target = data  # (B:batch x S:seq x C:features x N:points) , (B x S:seq)
            # Squeeze to drop Sequence dimension, which is equal to 1, convert all the data to float(otherwise there will be data type problems when running the model) and move to device
            points, target = points.squeeze(
                dim=1).float().to(device), target.float().to(
                    device)  # (B:batch x C:features x N:points) , (B)
            # points, target = points.float().to(device), target.float().to(device)
            # Reset gradients
            optimizer.zero_grad()
            # Sets the module in training mode
            classifier = classifier.train()
            # Forward propagation
            pred = classifier(points)
            # Calculate cross entropy loss (In the pointnet/pointnet2 network model, it outputs log_softmax result. Since
            # "log_softmax -> nll_loss" == CrossEntropyLoss, so that we just need to call F.nll_loss)
            loss = F.nll_loss(pred, target.long())
            if args.model_name == 'pointnet':
                loss += feature_transform_regularizer(classifier.trans) * 0.001
                if args.feature_transform:
                    loss += feature_transform_regularizer(
                        classifier.trans_feat) * 0.001
            # Back propagate
            loss.backward()
            # Update weights
            optimizer.step()
            # Log once for every 5 batches, add the "train_loss/cross_entropy" into tensorboard scalar which will be shown in tensorboard
            if not batch_id % 5:
                tb_writer.add_scalar('train_loss/cross_entropy', loss.item(),
                                     iteration)
            iteration += 1
            # if batch_id> 2: break

        scheduler.step()
        ''' --- TEST AND SAVE NETWORK --- '''
        if not epoch % 10:  # Doing the following things every epoch.
            # Perform predictions on the training data.
            train_targ, train_pred = test(classifier,
                                          trainDataset,
                                          device,
                                          num_workers=args.num_workers,
                                          batch_size=1800)
            # Perform predictions on the testing data.
            test_targ, test_pred = test(classifier,
                                        testDataset,
                                        device,
                                        num_workers=args.num_workers,
                                        batch_size=1800)

            # Calculate the accuracy rate for training data.
            train_acc = metrics_accuracy(train_targ, train_pred)
            # Calculate the accuracy rate for testing data.
            test_acc = metrics_accuracy(test_targ, test_pred)
            print('\r Training loss: {}'.format(loss.item()))
            print('Train Accuracy: {}\nTest Accuracy: {}'.format(
                train_acc, test_acc))
            # Add the "train_acc" "test_acc" into tensorboard scalars which will be shown in tensorboard.
            tb_writer.add_scalars('metrics/accuracy', {
                'train': train_acc,
                'test': test_acc
            }, iteration)

            # Calculate confusion matrix.
            confmatrix_test = metrics_confusion_matrix(test_targ, test_pred)
            print('Test confusion matrix: \n', confmatrix_test)
            # Log confusion matrix.
            fig, ax = plot_confusion_matrix(confmatrix_test,
                                            class_names,
                                            normalize=False,
                                            title='Test Confusion Matrix')
            # Log normalized confusion matrix.
            fig_n, ax_n = plot_confusion_matrix(
                confmatrix_test,
                class_names,
                normalize=True,
                title='Test Confusion Matrix - Normalized')
            # Add the "confusion matrix" "normalized confusion matrix" into tensorboard figure which will be shown in tensorboard.
            tb_writer.add_figure('test_confusion_matrix/abs',
                                 fig,
                                 global_step=iteration,
                                 close=True)
            tb_writer.add_figure('test_confusion_matrix/norm',
                                 fig_n,
                                 global_step=iteration,
                                 close=True)

            # Log precision recall curves.
            for idx, clsname in enumerate(class_names):
                # Convert log_softmax to softmax(which is actual probability) and select the desired class.
                test_pred_binary = torch.exp(test_pred[:, idx])
                test_targ_binary = test_targ.eq(idx)
                # Add the "precision recall curves" which will be shown in tensorboard.
                tb_writer.add_pr_curve(tag='pr_curves/' + clsname,
                                       labels=test_targ_binary,
                                       predictions=test_pred_binary,
                                       global_step=iteration)
            ''' --- SAVE NETWORK --- '''
            # if (test_acc >= best_test_acc): # For now lets save every time, since we are only testing in a subset of the test dataset
            best_test_acc = test_acc  # if test_acc > best_test_acc else best_test_acc
            state = {
                'epoch': epoch,
                'iteration': iteration,
                'train_accuracy': train_acc if args.train_metric else 0.0,
                'test_accuracy': best_test_acc,
                'model_state_dict': classifier.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }
            torch.save(state, saveloadpath)
            print('Model saved!!!')

    print('Best Accuracy: %f' % best_test_acc)

    tb_writer.close()
Exemplo n.º 20
0
class Training(object):
    def __init__(self, config, logger=None):
        if logger is None:
            logger = logging.getLogger('logger')
            logger.setLevel(logging.DEBUG)
            logging.basicConfig(format='%(message)s', level=logging.DEBUG)

        self.logger = logger
        self.config = config
        self.classes = list(config.id2label.keys())
        self.num_classes = config.num_classes

        self.embedder = Embedder(self.config)
        self.encoder = LSTMEncoder(self.config)
        self.clf = Classifier(self.config)
        self.clf_loss = SequenceCriteria(class_weight=None)
        if self.config.lambda_ae > 0: self.ae = AEModel(self.config)

        self.writer = SummaryWriter(log_dir="TFBoardSummary")
        self.global_steps = 0
        self.enc_clf_opt = Adam(self._get_trainabe_modules(),
                                lr=self.config.lr,
                                betas=(config.beta1, config.beta2),
                                weight_decay=config.weight_decay,
                                eps=config.eps)

        if config.scheduler == "ReduceLROnPlateau":
            self.scheduler = lr_scheduler.ReduceLROnPlateau(
                self.enc_clf_opt,
                mode='max',
                factor=config.lr_decay,
                patience=config.patience,
                verbose=True)
        elif config.scheduler == "ExponentialLR":
            self.scheduler = lr_scheduler.ExponentialLR(self.enc_clf_opt,
                                                        gamma=config.gamma)

        self._init_or_load_model()
        if config.multi_gpu:
            self.embedder.cuda()
            self.encoder.cuda()
            self.clf.cuda()
            self.clf_loss.cuda()
            if self.config.lambda_ae > 0: self.ae.cuda()

        self.ema_embedder = ExponentialMovingAverage(decay=0.999)
        self.ema_embedder.register(self.embedder.state_dict())
        self.ema_encoder = ExponentialMovingAverage(decay=0.999)
        self.ema_encoder.register(self.encoder.state_dict())
        self.ema_clf = ExponentialMovingAverage(decay=0.999)
        self.ema_clf.register(self.clf.state_dict())

        self.time_s = time()

    def _get_trainabe_modules(self):
        param_list = list(self.embedder.parameters()) + \
                     list(self.encoder.parameters()) + \
                     list(self.clf.parameters())
        if self.config.lambda_ae > 0:
            param_list += list(self.ae.parameters())
        return param_list

    def _get_l2_norm_loss(self):
        total_norm = 0.
        for p in self._get_trainabe_modules():
            param_norm = p.data.norm(p=2)
            total_norm += param_norm  # ** 2
        return total_norm  # / 2.

    def _init_or_load_model(self):
        # if not self._load_model():
        ensure_directory(self.config.output_path)
        self.epoch = 0
        self.best_accuracy = -np.inf

    def _compute_vocab_freq(self, train_, dev_):
        counter = collections.Counter()
        for _, ids_ in train_:
            counter.update(ids_)
        for _, ids_ in dev_:
            counter.update(ids_)
        word_freq = np.zeros(self.config.n_vocab)
        for index_, freq_ in counter.items():
            word_freq[index_] = freq_
        return torch.from_numpy(word_freq).type(batch_utils.FLOAT_TYPE)

    def _save_model(self):
        state = {
            'epoch': self.epoch,
            'state_dict_encoder': self.ema_encoder.shadow_variable_dict,
            # self.encoder.state_dict(),
            'state_dict_embedder': self.ema_embedder.shadow_variable_dict,
            # self.embedder.state_dict(),
            'state_dict_clf': self.ema_clf.shadow_variable_dict,
            # self.clf.state_dict(),
            'best_accuracy': self.best_accuracy
        }
        torch.save(
            state, os.path.join(self.config.output_path,
                                self.config.model_file))

    def _load_model(self):
        checkpoint_path = os.path.join(self.config.output_path,
                                       self.config.model_file)
        if self.config.load_checkpoint and os.path.isfile(checkpoint_path):
            # Code taken from here: https://github.com/pytorch/examples/blob/master/imagenet/main.py
            dict_ = torch.load(checkpoint_path)
            self.epoch = dict_['epoch']
            self.best_accuracy = dict_['best_accuracy']
            #             self.embedder.load_state_dict(dict_['state_dict_embedder'])
            self.encoder.load_state_dict(dict_['state_dict_encoder'])
            self.clf.load_state_dict(dict_['state_dict_clf'])
            self.logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, self.epoch))
            return True

    def __call__(self,
                 train,
                 dev,
                 test,
                 unlabel,
                 addn,
                 addn_un,
                 addn_test,
                 ek,
                 ek_t,
                 ek_u,
                 graph_embs,
                 graph_embs_t,
                 graph_embs_u,
                 addn_test_fr=None,
                 addn_test_f=None,
                 addn_test_r=None,
                 mode="train",
                 checkPth=None):
        self.logger.info('Start training')
        if (mode == "train"):
            self._train(train, dev, unlabel, addn, addn_un, addn_test, ek,
                        ek_t, ek_u, graph_embs, graph_embs_t, graph_embs_u)
            if self.config.behaviour_test:
                self._evaluate(test, addn_test, ek_t, graph_embs_t,
                               addn_test_fr, addn_test_f, addn_test_r)
            else:
                self._evaluate(test, addn_test, ek_t, graph_embs_t)
        else:
            model = torch.load(checkPth)
            self.embedder.load_state_dict(model['state_dict_embedder'])
            self.encoder.load_state_dict(model['state_dict_encoder'])
            self.clf.load_state_dict(model['state_dict_clf'])
            if self.config.behaviour_test:
                self._evaluate(test, addn_test, ek_t, graph_embs_t,
                               addn_test_fr, addn_test_f, addn_test_r)
            else:
                self._evaluate(test, addn_test, ek_t, graph_embs_t)
            # self.encoder.eval()
            # self.clf.eval()
            # one, two = self.encoder(self.embedder(batch), batch)
            # pred = self.clf(one, addn_batch, ek_batch, graph_embs_batch, two)
            # accuracy = self.get_accuracy(cm, pred.data, batch.labels.data)

    def _create_iter(self, data_, wbatchsize, random_shuffler=None):
        iter_ = data.iterator.pool(data_,
                                   wbatchsize,
                                   key=lambda x: len(x[1]),
                                   batch_size_fn=batch_size_fn,
                                   random_shuffler=None)
        return iter_

    def _run_epoch(self, train_data, dev_data, unlabel_data, addn_data,
                   addn_data_unlab, addn_dev, ek, ek_t, ek_u, graph_embs,
                   graph_embs_t, graph_embs_u):
        addn_dev.cuda()
        ek_t.cuda()
        graph_embs_t.cuda()
        report_stats = utils.Statistics()
        cm = ConfusionMatrix(self.classes)
        _, seq_data = list(zip(*train_data))
        total_seq_words = len(list(itertools.chain.from_iterable(seq_data)))
        iter_per_epoch = (1.5 * total_seq_words) // self.config.wbatchsize

        self.encoder.train()
        self.clf.train()

        train_iter = self._create_iter(train_data, self.config.wbatchsize)

        unlabel_iter = self._create_iter(unlabel_data,
                                         self.config.wbatchsize_unlabel)

        sofar = 0
        sofar_1 = 0
        for batch_index, train_batch_raw in enumerate(train_iter):
            seq_iter = list(zip(*train_batch_raw))[1]
            seq_words = len(list(itertools.chain.from_iterable(seq_iter)))
            report_stats.n_words += seq_words
            self.global_steps += 1

            # self.enc_clf_opt.zero_grad()
            if self.config.add_noise:
                train_batch_raw = add_noise(train_batch_raw,
                                            self.config.noise_dropout,
                                            self.config.random_permutation)
            train_batch = batch_utils.seq_pad_concat(train_batch_raw, -1)

            train_embedded = self.embedder(train_batch)

            memory_bank_train, enc_final_train = self.encoder(
                train_embedded, train_batch)

            if self.config.lambda_vat > 0 or self.config.lambda_ae > 0 or self.config.lambda_entropy:
                try:
                    unlabel_batch_raw = next(unlabel_iter)
                except StopIteration:
                    unlabel_iter = self._create_iter(
                        unlabel_data, self.config.wbatchsize_unlabel)
                    unlabel_batch_raw = next(unlabel_iter)

                if self.config.add_noise:
                    unlabel_batch_raw = add_noise(
                        unlabel_batch_raw, self.config.noise_dropout,
                        self.config.random_permutation)
                unlabel_batch = batch_utils.seq_pad_concat(
                    unlabel_batch_raw, -1)
                unlabel_embedded = self.embedder(unlabel_batch)
                memory_bank_unlabel, enc_final_unlabel = self.encoder(
                    unlabel_embedded, unlabel_batch)
                addn_batch_unlab = retAddnBatch(addn_data_unlab,
                                                memory_bank_unlabel.shape[0],
                                                sofar_1).cuda()
                ek_batch_unlab = retAddnBatch(ek_u,
                                              memory_bank_unlabel.shape[0],
                                              sofar_1).cuda()
                graph_embs_unlab = retAddnBatch(graph_embs_u,
                                                memory_bank_unlabel.shape[0],
                                                sofar_1).cuda()
                sofar_1 += addn_batch_unlab.shape[0]
                if sofar_1 >= ek_u.shape[0]:
                    sofar_1 = 0
            addn_batch = retAddnBatch(addn_data, memory_bank_train.shape[0],
                                      sofar).cuda()
            ek_batch = retAddnBatch(ek, memory_bank_train.shape[0],
                                    sofar).cuda()
            graph_embs_batch = retAddnBatch(graph_embs,
                                            memory_bank_train.shape[0],
                                            sofar).cuda()
            sofar += addn_batch.shape[0]
            if sofar >= ek.shape[0]:
                sofar = 0
            pred = self.clf(memory_bank_train, addn_batch, ek_batch,
                            enc_final_train, graph_embs_batch)
            accuracy = self.get_accuracy(cm, pred.data,
                                         train_batch.labels.data)
            lclf = self.clf_loss(pred, train_batch.labels)

            lat = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            lvat = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            if self.config.lambda_at > 0:
                lat = at_loss(
                    self.embedder,
                    self.encoder,
                    self.clf,
                    train_batch,
                    addn_batch,
                    ek_batch,
                    graph_embs_batch,
                    perturb_norm_length=self.config.perturb_norm_length)

            if self.config.lambda_vat > 0:
                lvat_train = vat_loss(
                    self.embedder,
                    self.encoder,
                    self.clf,
                    train_batch,
                    addn_batch,
                    ek_batch,
                    graph_embs_batch,
                    p_logit=pred,
                    perturb_norm_length=self.config.perturb_norm_length)
                if self.config.inc_unlabeled_loss:
                    if memory_bank_unlabel.shape[0] != ek_batch_unlab.shape[0]:
                        print(
                            f'Skipping; Unequal Shapes: {memory_bank_unlabel.shape} and {ek_batch_unlab.shape}'
                        )
                        continue
                    else:
                        lvat_unlabel = vat_loss(
                            self.embedder,
                            self.encoder,
                            self.clf,
                            unlabel_batch,
                            addn_batch_unlab,
                            ek_batch_unlab,
                            graph_embs_unlab,
                            p_logit=self.clf(memory_bank_unlabel,
                                             addn_batch_unlab, ek_batch_unlab,
                                             enc_final_unlabel,
                                             graph_embs_unlab),
                            perturb_norm_length=self.config.perturb_norm_length
                        )
                    if self.config.unlabeled_loss_type == "AvgTrainUnlabel":
                        lvat = 0.5 * (lvat_train + lvat_unlabel)
                    elif self.config.unlabeled_loss_type == "Unlabel":
                        lvat = lvat_unlabel
                else:
                    lvat = lvat_train

            lentropy = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            if self.config.lambda_entropy > 0:
                lentropy_train = entropy_loss(pred)
                if self.config.inc_unlabeled_loss:
                    lentropy_unlabel = entropy_loss(
                        self.clf(memory_bank_unlabel, addn_batch_unlab,
                                 ek_batch_unlab, enc_final_unlabel,
                                 graph_embs_unlab))
                    if self.config.unlabeled_loss_type == "AvgTrainUnlabel":
                        lentropy = 0.5 * (lentropy_train + lentropy_unlabel)
                    elif self.config.unlabeled_loss_type == "Unlabel":
                        lentropy = lentropy_unlabel
                else:
                    lentropy = lentropy_train

            lae = Variable(
                torch.FloatTensor([-1.]).type(batch_utils.FLOAT_TYPE))
            if self.config.lambda_ae > 0:
                lae = self.ae(memory_bank_unlabel, enc_final_unlabel,
                              unlabel_batch.sent_len, unlabel_batch_raw)

            ltotal = (self.config.lambda_clf * lclf) + \
                     (self.config.lambda_ae * lae) + \
                     (self.config.lambda_at * lat) + \
                     (self.config.lambda_vat * lvat) + \
                     (self.config.lambda_entropy * lentropy)

            report_stats.clf_loss += lclf.data.cpu().numpy()
            report_stats.at_loss += lat.data.cpu().numpy()
            report_stats.vat_loss += lvat.data.cpu().numpy()
            report_stats.ae_loss += lae.data.cpu().numpy()
            report_stats.entropy_loss += lentropy.data.cpu().numpy()
            report_stats.n_sent += len(pred)
            report_stats.n_correct += accuracy
            self.enc_clf_opt.zero_grad()
            ltotal.backward()

            params_list = self._get_trainabe_modules()
            # Excluding embedder form norm constraint when AT or VAT
            if not self.config.normalize_embedding:
                params_list += list(self.embedder.parameters())

            norm = torch.nn.utils.clip_grad_norm(params_list,
                                                 self.config.max_norm)
            report_stats.grad_norm += norm
            self.enc_clf_opt.step()
            if self.config.scheduler == "ExponentialLR":
                self.scheduler.step()
            self.ema_embedder.apply(self.embedder.named_parameters())
            self.ema_encoder.apply(self.encoder.named_parameters())
            self.ema_clf.apply(self.clf.named_parameters())

            report_func(self.epoch, batch_index, iter_per_epoch, self.time_s,
                        report_stats, self.config.report_every, self.logger)

            if self.global_steps % self.config.eval_steps == 0:
                cm_, accuracy, prc_dev = self._run_evaluate(
                    dev_data, addn_dev, ek_t, graph_embs_t)
                self.logger.info(
                    "- dev accuracy {} | best dev accuracy {} ".format(
                        accuracy, self.best_accuracy))
                self.writer.add_scalar("Dev_Accuracy", accuracy,
                                       self.global_steps)
                pred_, lab_ = zip(*prc_dev)
                pred_ = torch.cat(pred_)
                lab_ = torch.cat(lab_)
                self.writer.add_pr_curve("Dev PR-Curve", lab_, pred_,
                                         self.global_steps)
                pprint.pprint(cm_)
                pprint.pprint(cm_.get_all_metrics())
                if accuracy > self.best_accuracy:
                    self.logger.info("- new best score!")
                    self.best_accuracy = accuracy
                    self._save_model()
                if self.config.scheduler == "ReduceLROnPlateau":
                    self.scheduler.step(accuracy)
                self.encoder.train()
                #                 self.embedder.train()
                self.clf.train()

                if self.config.weight_decay > 0:
                    print(">> Square Norm: %1.4f " % self._get_l2_norm_loss())

        cm, train_accuracy, _ = self._run_evaluate(train_data, addn_data, ek,
                                                   graph_embs)
        self.logger.info("- Train accuracy  {}".format(train_accuracy))
        pprint.pprint(cm.get_all_metrics())

        cm, dev_accuracy, _ = self._run_evaluate(dev_data, addn_dev, ek_t,
                                                 graph_embs_t)
        self.logger.info("- Dev accuracy  {} | best dev accuracy {}".format(
            dev_accuracy, self.best_accuracy))
        pprint.pprint(cm.get_all_metrics())
        self.writer.add_scalars("Overall_Accuracy", {
            "Train_Accuracy": train_accuracy,
            "Dev_Accuracy": dev_accuracy
        }, self.global_steps)
        return dev_accuracy

    @staticmethod
    def get_accuracy(cm, output, target):
        batch_size = output.size(0)
        predictions = output.max(-1)[1].type_as(target)
        correct = predictions.eq(target)
        correct = correct.float()
        if not hasattr(correct, 'sum'):
            correct = correct.cpu()
        correct = correct.sum()
        cm.add_batch(target.cpu().numpy(), predictions.cpu().numpy())
        return correct

    def _predict_batch(self, cm, batch, addn_batch, ek_batch,
                       graph_embs_batch):
        #         self.embedder.eval()
        self.encoder.eval()
        self.clf.eval()
        one, two = self.encoder(self.embedder(batch), batch)
        pred = self.clf(one, addn_batch, ek_batch, two, graph_embs_batch)
        torch.save(batch, 'co_attn_text.pth')
        torch.save(self.clf.co_attn_1.seq_len_weights,
                   'co_attn_weights_eval.pth')
        accuracy = self.get_accuracy(cm, pred.data, batch.labels.data)
        return pred, accuracy

    def chunks(self, l, n=15):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(l), n):
            yield l[i:i + n]

    def _run_evaluate(self, test_data, addn_test, ek_t, graph_embs_t):
        pr_curve_data = []
        cm = ConfusionMatrix(self.classes)
        accuracy_list = []
        # test_iter = self._create_iter(test_data, self.config.wbatchsize,
        #                               random_shuffler=utils.identity_fun)
        test_iter = self.chunks(test_data)

        for batch_index, test_batch in enumerate(test_iter):
            addn_batch = addn_test[batch_index * 15:(batch_index + 1) * 15]
            ek_batch = ek_t[batch_index * 15:(batch_index + 1) * 15]
            graph_embs_batch = graph_embs_t[batch_index *
                                            15:(batch_index + 1) * 15]
            test_batch = batch_utils.seq_pad_concat(test_batch, -1)
            try:
                pred, acc = self._predict_batch(cm, test_batch, addn_batch,
                                                ek_batch, graph_embs_batch)
            except:
                continue
            accuracy_list.append(acc)
            pr_curve_data.append(
                (F.softmax(pred, -1)[:, 1].data, test_batch.labels.data))
        accuracy = 100 * (sum(accuracy_list) / len(test_data))
        return cm, accuracy, pr_curve_data

    def _train(self, train_data, dev_data, unlabel_data, addn_data,
               addn_data_unlab, addn_dev, ek, ek_t, ek_u, graph_embs,
               graph_embs_t, graph_embs_u):
        addn_data = addn_data.cuda()
        addn_data_unlab = addn_data_unlab.cuda()
        addn_dev = addn_dev.cuda()
        ek = ek.cuda()
        ek_t = ek_t.cuda()
        ek_u = ek_u.cuda()
        graph_embs = graph_embs.cuda()
        graph_embs_t = graph_embs_t.cuda()
        graph_embs_u = graph_embs_u.cuda()
        # for early stopping
        nepoch_no_imprv = 0

        epoch_start = self.epoch + 1
        epoch_end = self.epoch + self.config.nepochs + 1
        for self.epoch in range(epoch_start, epoch_end):
            self.logger.info("Epoch {:} out of {:}".format(
                self.epoch, self.config.nepochs))
            #             random.shuffle(train_data)
            #             random.shuffle(unlabel_data)
            accuracy = self._run_epoch(train_data, dev_data, unlabel_data,
                                       addn_data, addn_data_unlab, addn_dev,
                                       ek, ek_t, ek_u, graph_embs,
                                       graph_embs_t, graph_embs_u)

            # early stopping and saving best parameters
            if accuracy > self.best_accuracy:
                nepoch_no_imprv = 0
                self.best_accuracy = accuracy
                self.logger.info("- new best score!")
                self._save_model()
            else:
                nepoch_no_imprv += 1
                if nepoch_no_imprv >= self.config.nepoch_no_imprv:
                    self.logger.info(
                        "- early stopping {} epochs without improvement".
                        format(nepoch_no_imprv))
                    break
            if self.config.scheduler == "ReduceLROnPlateau":
                self.scheduler.step(accuracy)

    def _evaluate(self,
                  test_data,
                  addn_test,
                  ek_t,
                  graph_embs_t,
                  addn_test_fr=None,
                  addn_test_f=None,
                  addn_test_r=None,
                  mode="train"):
        addn_test = addn_test.cuda()
        ek_t = ek_t.cuda()
        graph_embs_t = graph_embs_t.cuda()
        self.logger.info("Evaluating model over test set")
        if (mode == "train"):
            self._load_model()
        _, accuracy, prc_test = self._run_evaluate(test_data, addn_test, ek_t,
                                                   graph_embs_t)
        if self.config.behaviour_test:
            addn_test_fr = addn_test_fr.cuda()
            addn_test_f = addn_test_f.cuda()
            addn_test_r = addn_test_r.cuda()
            _, accuracy_fr, prc_test_fr = self._run_evaluate(
                test_data, addn_test_fr, ek_t, graph_embs_t)
            _, accuracy_f, prc_test_f = self._run_evaluate(
                test_data, addn_test_f, ek_t, graph_embs_t)
            _, accuracy_r, prc_test_r = self._run_evaluate(
                test_data, addn_test_r, ek_t, graph_embs_t)
        pred_, lab_ = zip(*prc_test)
        pred_ = torch.cat(pred_).cpu().tolist()
        lab_ = torch.cat(lab_).cpu().tolist()
        path_ = os.path.join(self.config.output_path,
                             "{}_pred_gt.tsv".format(self.config.exp_name))
        with open(path_, 'w') as fp:
            for p, l in zip(pred_, lab_):
                fp.write(str(p) + '\t' + str(l) + '\n')
        self.logger.info("- test accuracy {}".format(accuracy))
        pred_ = [round(pred_[i]) for i in range(len(pred_))]
        print('Normal Test Set: ', confusion_matrix(lab_, pred_))
        if self.config.behaviour_test:
            self.logger.info(
                "- behaviour test accuracy - fr {}".format(accuracy_fr))
            self.logger.info(
                "- behaviour test accuracy - f {}".format(accuracy_f))
            self.logger.info(
                "- behaviour test accuracy - r {}".format(accuracy_r))
Exemplo n.º 21
0
def main():
    # Args
    parser = argparse.ArgumentParser()
    parser.add_argument('--net',
                        type=str,
                        help='Net model class',
                        required=True)
    parser.add_argument('--traindb',
                        type=str,
                        help='Training datasets',
                        nargs='+',
                        choices=split.available_datasets,
                        required=True)
    parser.add_argument('--valdb',
                        type=str,
                        help='Validation datasets',
                        nargs='+',
                        choices=split.available_datasets,
                        required=True)
    parser.add_argument('--face',
                        type=str,
                        help='Face crop or scale',
                        required=True,
                        choices=['scale', 'tight'])
    parser.add_argument('--size',
                        type=int,
                        help='Train patch size',
                        required=True)

    parser.add_argument('--batch',
                        type=int,
                        help='Batch size to fit in GPU memory',
                        default=32)
    parser.add_argument('--lr', type=float, default=1e-5, help='Learning rate')
    parser.add_argument('--valint',
                        type=int,
                        help='Validation interval (iterations)',
                        default=500)
    parser.add_argument(
        '--patience',
        type=int,
        help='Patience before dropping the LR [validation intervals]',
        default=10)
    parser.add_argument('--maxiter',
                        type=int,
                        help='Maximum number of iterations',
                        default=20000)
    parser.add_argument('--init', type=str, help='Weight initialization file')
    parser.add_argument('--scratch',
                        action='store_true',
                        help='Train from scratch')

    parser.add_argument('--trainsamples',
                        type=int,
                        help='Limit the number of train samples per epoch',
                        default=-1)
    parser.add_argument(
        '--valsamples',
        type=int,
        help='Limit the number of validation samples per epoch',
        default=6000)

    parser.add_argument('--logint',
                        type=int,
                        help='Training log interval (iterations)',
                        default=100)
    parser.add_argument('--workers',
                        type=int,
                        help='Num workers for data loaders',
                        default=6)
    parser.add_argument('--device', type=int, help='GPU device id', default=0)
    parser.add_argument('--seed', type=int, help='Random seed', default=0)

    parser.add_argument('--debug', action='store_true', help='Activate debug')
    parser.add_argument('--suffix', type=str, help='Suffix to default tag')

    parser.add_argument('--attention',
                        action='store_true',
                        help='Enable Tensorboard log of attention masks')
    parser.add_argument('--log_dir',
                        type=str,
                        help='Directory for saving the training logs',
                        default='runs/binclass/')
    parser.add_argument('--models_dir',
                        type=str,
                        help='Directory for saving the models weights',
                        default='weights/binclass/')

    args = parser.parse_args()

    # Parse arguments
    net_class = getattr(fornet, args.net)
    train_datasets = args.traindb
    val_datasets = args.valdb
    face_policy = args.face
    face_size = args.size

    batch_size = args.batch
    initial_lr = args.lr
    validation_interval = args.valint
    patience = args.patience
    max_num_iterations = args.maxiter
    initial_model = args.init
    train_from_scratch = args.scratch

    max_train_samples = args.trainsamples
    max_val_samples = args.valsamples

    log_interval = args.logint
    num_workers = args.workers
    device = torch.device('cuda:{:d}'.format(
        args.device)) if torch.cuda.is_available() else torch.device('cpu')
    seed = args.seed

    debug = args.debug
    suffix = args.suffix

    enable_attention = args.attention

    weights_folder = args.models_dir
    logs_folder = args.log_dir

    # Random initialization
    np.random.seed(seed)
    torch.random.manual_seed(seed)

    # Load net
    net: nn.Module = net_class().to(device)

    # Loss and optimizers
    criterion = nn.BCEWithLogitsLoss()

    min_lr = initial_lr * 1e-5
    optimizer = optim.Adam(net.get_trainable_parameters(), lr=initial_lr)
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer,
        mode='min',
        factor=0.1,
        patience=patience,
        cooldown=2 * patience,
        min_lr=min_lr,
    )

    tag = utils.make_train_tag(
        net_class=net_class,
        traindb=train_datasets,
        face_policy=face_policy,
        patch_size=face_size,
        seed=seed,
        suffix=suffix,
        debug=debug,
    )

    # Model checkpoint paths
    bestval_path = os.path.join(weights_folder, tag, 'bestval.pth')
    last_path = os.path.join(weights_folder, tag, 'last.pth')
    periodic_path = os.path.join(weights_folder, tag, 'it{:06d}.pth')

    os.makedirs(os.path.join(weights_folder, tag), exist_ok=True)

    # Load model
    val_loss = min_val_loss = 10
    epoch = iteration = 0
    net_state = None
    opt_state = None
    if initial_model is not None:
        # If given load initial model
        print('Loading model form: {}'.format(initial_model))
        state = torch.load(initial_model, map_location='cpu')
        net_state = state['net']
    elif not train_from_scratch and os.path.exists(last_path):
        print('Loading model form: {}'.format(last_path))
        state = torch.load(last_path, map_location='cpu')
        net_state = state['net']
        opt_state = state['opt']
        iteration = state['iteration'] + 1
        epoch = state['epoch']
    if not train_from_scratch and os.path.exists(bestval_path):
        state = torch.load(bestval_path, map_location='cpu')
        min_val_loss = state['val_loss']
    if net_state is not None:
        incomp_keys = net.load_state_dict(net_state, strict=False)
        print(incomp_keys)
    if opt_state is not None:
        for param_group in opt_state['param_groups']:
            param_group['lr'] = initial_lr
        optimizer.load_state_dict(opt_state)

    # Initialize Tensorboard
    logdir = os.path.join(logs_folder, tag)
    if iteration == 0:
        # If training from scratch or initialization remove history if exists
        shutil.rmtree(logdir, ignore_errors=True)

    # TensorboardX instance
    tb = SummaryWriter(logdir=logdir)
    if iteration == 0:
        dummy = torch.randn((1, 3, face_size, face_size), device=device)
        dummy = dummy.to(device)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            tb.add_graph(net, [
                dummy,
            ], verbose=False)

    transformer = utils.get_transformer(face_policy=face_policy,
                                        patch_size=face_size,
                                        net_normalizer=net.get_normalizer(),
                                        train=True)

    # Datasets and data loaders
    print('Loading data')
    splits = split.make_splits(dbs={
        'train': train_datasets,
        'val': val_datasets
    })
    train_dfs = [splits['train'][db][0] for db in splits['train']]
    train_roots = [splits['train'][db][1] for db in splits['train']]
    val_roots = [splits['val'][db][1] for db in splits['val']]
    val_dfs = [splits['val'][db][0] for db in splits['val']]

    train_dataset = FrameFaceIterableDataset(
        roots=train_roots,
        dfs=train_dfs,
        scale=face_policy,
        num_samples=max_train_samples,
        transformer=transformer,
        size=face_size,
    )

    val_dataset = FrameFaceIterableDataset(
        roots=val_roots,
        dfs=val_dfs,
        scale=face_policy,
        num_samples=max_val_samples,
        transformer=transformer,
        size=face_size,
    )

    train_loader = DataLoader(
        train_dataset,
        num_workers=num_workers,
        batch_size=batch_size,
    )

    val_loader = DataLoader(
        val_dataset,
        num_workers=num_workers,
        batch_size=batch_size,
    )

    print('Training samples: {}'.format(len(train_dataset)))
    print('Validation samples: {}'.format(len(val_dataset)))

    if len(train_dataset) == 0:
        print('No training samples. Halt.')
        return

    if len(val_dataset) == 0:
        print('No validation samples. Halt.')
        return

    stop = False
    while not stop:

        # Training
        optimizer.zero_grad()

        train_loss = train_num = 0
        train_pred_list = []
        train_labels_list = []
        for train_batch in tqdm(train_loader,
                                desc='Epoch {:03d}'.format(epoch),
                                leave=False,
                                total=len(train_loader) //
                                train_loader.batch_size):
            net.train()
            batch_data, batch_labels = train_batch

            train_batch_num = len(batch_labels)
            train_num += train_batch_num
            train_labels_list.append(batch_labels.numpy().flatten())

            train_batch_loss, train_batch_pred = batch_forward(
                net, device, criterion, batch_data, batch_labels)
            train_pred_list.append(train_batch_pred.flatten())

            if torch.isnan(train_batch_loss):
                raise ValueError('NaN loss')

            train_loss += train_batch_loss.item() * train_batch_num

            # Optimization
            train_batch_loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # Logging
            if iteration > 0 and (iteration % log_interval == 0):
                train_loss /= train_num
                tb.add_scalar('train/loss', train_loss, iteration)
                tb.add_scalar('lr', optimizer.param_groups[0]['lr'], iteration)
                tb.add_scalar('epoch', epoch, iteration)

                # Checkpoint
                save_model(net, optimizer, train_loss, val_loss, iteration,
                           batch_size, epoch, last_path)
                train_loss = train_num = 0

            # Validation
            if iteration > 0 and (iteration % validation_interval == 0):

                # Model checkpoint
                save_model(net, optimizer, train_loss, val_loss, iteration,
                           batch_size, epoch, periodic_path.format(iteration))

                # Train cumulative stats
                train_labels = np.concatenate(train_labels_list)
                train_pred = np.concatenate(train_pred_list)
                train_labels_list = []
                train_pred_list = []

                train_roc_auc = roc_auc_score(train_labels, train_pred)
                tb.add_scalar('train/roc_auc', train_roc_auc, iteration)
                tb.add_pr_curve('train/pr', train_labels, train_pred,
                                iteration)

                # Validation
                val_loss = validation_routine(net, device, val_loader,
                                              criterion, tb, iteration, 'val')
                tb.flush()

                # LR Scheduler
                lr_scheduler.step(val_loss)

                # Model checkpoint
                if val_loss < min_val_loss:
                    min_val_loss = val_loss
                    save_model(net, optimizer, train_loss, val_loss, iteration,
                               batch_size, epoch, bestval_path)

                # Attention
                if enable_attention and hasattr(net, 'get_attention'):
                    net.eval()
                    # For each dataframe show the attention for a real,fake couple of frames
                    for df, root, sample_idx, tag in [
                        (train_dfs[0], train_roots[0],
                         train_dfs[0][train_dfs[0]['label'] == False].index[0],
                         'train/att/real'),
                        (train_dfs[0], train_roots[0],
                         train_dfs[0][train_dfs[0]['label'] == True].index[0],
                         'train/att/fake'),
                    ]:
                        record = df.loc[sample_idx]
                        tb_attention(tb, tag, iteration, net, device,
                                     face_size, face_policy, transformer, root,
                                     record)

                if optimizer.param_groups[0]['lr'] == min_lr:
                    print('Reached minimum learning rate. Stopping.')
                    stop = True
                    break

            iteration += 1

            if iteration > max_num_iterations:
                print('Maximum number of iterations reached')
                stop = True
                break

            # End of iteration

        epoch += 1

    # Needed to flush out last events
    tb.close()

    print('Completed')
                                             columns=['false', 'true'])
                        sns.heatmap(cm_df, annot=True, fmt="d")
                        plt.title('Accuracy:{0:.3f}'.format(acc))
                        plt.ylabel('True label')
                        plt.xlabel('Predicted label')
                        print(cm)
                        train_writer.add_scalar("pr_auc", pr_auc,
                                                (epoch * num_steps) + step)
                        train_writer.add_scalar("roc_auc", roc_auc,
                                                (epoch * num_steps) + step)
                        train_writer.add_figure("roc_curve", fig,
                                                (epoch * num_steps) + step)
                        train_writer.add_figure("cm", fig_cm,
                                                (epoch * num_steps) + step)
                        train_writer.add_pr_curve("pr_curve",
                                                  labels[start_ind:end_ind],
                                                  predict_train,
                                                  (epoch * num_steps) + step)

                if ((epoch * num_steps) +
                        step) % dev_print_gap == dev_print_gap - 1:
                    l2s_test, labels_test = generate_data(
                        dataset=dataset,
                        data_index=dev_index,
                        flow_size=flow_size,
                        negetive_samples=negetive_samples_test)
                    test_time_before = time.time()
                    tp = 0
                    fp = 0
                    loss_sum = 0
                    num_steps_test = (len(l2s_test) // batch_size) - 1
                    Y_est = np.zeros((batch_size * (num_steps_test + 1), 1),