예제 #1
0
def train(args,
          train_loader,
          model,
          criterion,
          optimizer,
          epoch,
          scheduler=None):
    losses = AverageMeter()
    ious = AverageMeter()
    dices_1s = AverageMeter()
    dices_2s = AverageMeter()
    model.train()

    for i, (input, target) in tqdm(enumerate(train_loader),
                                   total=len(train_loader)):

        #print(input.shape)
        #print(target.shape)
        #v = input()
        input = input.cuda()
        target = target.cuda()

        # compute output
        if args.deepsupervision:
            outputs = model(input)
            loss = 0
            for output in outputs:
                loss += criterion(output, target)
            loss /= len(outputs)
            iou = iou_score(outputs[-1], target)
        else:
            output = model(input)
            loss = criterion(output, target)
            iou = iou_score(output, target)
            dice_1 = dice_coef(output, target)[0]
            dice_2 = dice_coef(output, target)[1]

        losses.update(loss.item(), input.size(0))
        ious.update(iou, input.size(0))
        dices_1s.update(torch.tensor(dice_1), input.size(0))
        dices_2s.update(torch.tensor(dice_2), input.size(0))

        # compute gradient and do optimizing step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    log = OrderedDict([('loss', losses.avg), ('iou', ious.avg),
                       ('dice_1', dices_1s.avg), ('dice_2', dices_2s.avg)])

    return log
예제 #2
0
def compute_ious(df, max_dist=10):
    """
    Computes ious between boxes. If boxes are too far the iou is set to 0.

    Args:
        df (pandas dataframe): Predicted boxes.
        max_dist (int, optional): Maximum frame distance to compute iou for. Defaults to 10.

    Returns:
        np array [len(df) x len(df)]: ious between boxes.
    """
    ious = np.zeros((len(df), len(df)))
    for i in range(len(df)):
        for j in range(len(df)):
            frames = df["frame"].values[[i, j]]
            if np.abs(frames[0] - frames[1]) > max_dist:
                continue

            try:
                boxes = df[["left", "width", "top", "height"]].values[[i, j]]
            except KeyError:
                boxes = df[["x", "w", "y", "h"]].values[[i, j]]

            boxes[:, 1] += boxes[:, 0]
            boxes[:, 3] += boxes[:, 2]
            boxes = boxes[:, [0, 2, 1, 3]]

            iou = iou_score(boxes[0], boxes[1])
            ious[i, j] = iou
            ious[j, i] = iou
    return ious
예제 #3
0
def validate(args, val_loader, model, criterion):
    losses = AverageMeter()
    ious = AverageMeter()
    dices_1s = AverageMeter()
    dices_2s = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        for i, (input, target) in tqdm(enumerate(val_loader),
                                       total=len(val_loader)):
            input = input.cuda()
            target = target.cuda()

            # compute output
            if args.deepsupervision:
                outputs = model(input)
                loss = 0
                for output in outputs:
                    loss += criterion(output, target)
                loss /= len(outputs)
                iou = iou_score(outputs[-1], target)
            else:
                output = model(input)
                loss = criterion(output, target)
                iou = iou_score(output, target)
                dice_1 = dice_coef(output, target)[0]
                dice_2 = dice_coef(output, target)[1]

            losses.update(loss.item(), input.size(0))
            ious.update(iou, input.size(0))
            dices_1s.update(torch.tensor(dice_1), input.size(0))
            dices_2s.update(torch.tensor(dice_2), input.size(0))

    log = OrderedDict([('loss', losses.avg), ('iou', ious.avg),
                       ('dice_1', dices_1s.avg), ('dice_2', dices_2s.avg)])

    return log
예제 #4
0
    def __call__(self, features, labels, mode, params):

        if "debug_verbosity" not in params.keys():
            raise RuntimeError("Parameter `debug_verbosity` is missing...")

        if mode == tf.estimator.ModeKeys.TRAIN:

            if "rmsprop_decay" not in params.keys():
                raise RuntimeError("Parameter `rmsprop_decay` is missing...")

            if "rmsprop_momentum" not in params.keys():
                raise RuntimeError(
                    "Parameter `rmsprop_momentum` is missing...")

            if "learning_rate" not in params.keys():
                raise RuntimeError("Parameter `learning_rate` is missing...")

            if "learning_rate_decay_steps" not in params.keys():
                raise RuntimeError("Parameter `learning_rate` is missing...")

            if "learning_rate_decay_factor" not in params.keys():
                raise RuntimeError("Parameter `learning_rate` is missing...")

            if "weight_decay" not in params.keys():
                raise RuntimeError("Parameter `weight_decay` is missing...")

            if "loss_fn_name" not in params.keys():
                raise RuntimeError("Parameter `loss_fn_name` is missing...")

        if mode == tf.estimator.ModeKeys.PREDICT:
            y_pred, y_pred_logits = self.build_model(
                features,
                training=False,
                reuse=False,
                debug_verbosity=params["debug_verbosity"])

            predictions = {'logits': y_pred}
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=predictions)

        input_image, mask_image = features

        with tf.device("/gpu:0"):

            tf.identity(input_image, name="input_image_ref")
            tf.identity(mask_image, name="mask_image_ref")
            tf.identity(labels, name="labels_ref")

            y_pred, y_pred_logits = self.build_model(
                input_image,
                training=mode == tf.estimator.ModeKeys.TRAIN,
                reuse=False,
                debug_verbosity=params["debug_verbosity"])

            all_trainable_vars = tf.reduce_sum(
                [tf.reduce_prod(v.shape) for v in tf.trainable_variables()])
            tf.identity(all_trainable_vars,
                        name='trainable_parameters_count_ref')

            if mode == tf.estimator.ModeKeys.EVAL:
                eval_metrics = dict()

            # ==================== Samples ==================== #

            image_uint8 = tf.cast((input_image + 1) * 127.5, dtype=tf.uint8)
            input_image_jpeg = tf.image.encode_jpeg(image_uint8[0],
                                                    format='grayscale',
                                                    quality=100)
            tf.identity(input_image_jpeg, name="input_image_jpeg_ref")

            for threshold in [
                    None, 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99
            ]:
                binarize_img, binarize_img_jpeg = image_processing.binarize_output(
                    y_pred[0], threshold=threshold)

                tf.identity(binarize_img_jpeg,
                            name="output_sample_ths_%s_ref" % threshold)
                tf.summary.image('output_sample_ths_%s' % threshold,
                                 binarize_img, 10)

            # ==============+ Evaluation Metrics ==================== #

            with tf.name_scope("IoU_Metrics"):

                for threshold in [
                        0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99
                ]:

                    iou_score = metrics.iou_score(y_pred=y_pred,
                                                  y_true=mask_image,
                                                  threshold=threshold)

                    tf.identity(iou_score,
                                name='iou_score_ths_%s_ref' % threshold)
                    tf.summary.scalar('iou_score_ths_%s' % threshold,
                                      iou_score)

                    if mode == tf.estimator.ModeKeys.EVAL:
                        eval_metrics["IoU_THS_%s" %
                                     threshold] = tf.metrics.mean(iou_score)

            labels = tf.cast(labels, tf.float32)
            labels_preds = tf.reduce_max(y_pred, axis=(1, 2, 3))

            assert (abs(labels_preds - tf.clip_by_value(labels_preds, 0, 1)) <
                    0.00001,
                    "Clipping labels_preds introduces non-trivial loss.")
            labels_preds = tf.clip_by_value(labels_preds, 0, 1)

            with tf.variable_scope("Confusion_Matrix") as scope:

                tp, update_tp = tf.metrics.true_positives_at_thresholds(
                    labels=labels,
                    predictions=labels_preds,
                    thresholds=[
                        0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99
                    ],
                )

                tn, update_tn = tf.metrics.true_negatives_at_thresholds(
                    labels=labels,
                    predictions=labels_preds,
                    thresholds=[
                        0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99
                    ],
                )

                fp, update_fp = tf.metrics.false_positives_at_thresholds(
                    labels=labels,
                    predictions=labels_preds,
                    thresholds=[
                        0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99
                    ],
                )

                fn, update_fn = tf.metrics.false_negatives_at_thresholds(
                    labels=labels,
                    predictions=labels_preds,
                    thresholds=[
                        0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99
                    ],
                )

                if mode == tf.estimator.ModeKeys.TRAIN:
                    local_vars = tf.get_collection(
                        tf.GraphKeys.LOCAL_VARIABLES, scope=scope.name)
                    confusion_matrix_reset_op = tf.initializers.variables(
                        local_vars, name='reset_op')

                    with tf.control_dependencies([confusion_matrix_reset_op]):
                        with tf.control_dependencies(
                            [update_tp, update_tn, update_fp, update_fn]):
                            tp = tf.identity(tp)
                            tn = tf.identity(tn)
                            fp = tf.identity(fp)
                            fn = tf.identity(fn)

                else:
                    eval_metrics["Confusion_Matrix_TP"] = tp, update_tp
                    eval_metrics["Confusion_Matrix_TN"] = tn, update_tn
                    eval_metrics["Confusion_Matrix_FP"] = fp, update_fp
                    eval_metrics["Confusion_Matrix_FN"] = fn, update_fn

                tf.identity(tp, name='true_positives_ref'
                            )  # Confusion_Matrix/true_positives_ref:0
                tf.identity(tn, name='true_negatives_ref'
                            )  # Confusion_Matrix/true_negatives_ref:0
                tf.identity(fp, name='false_positives_ref'
                            )  # Confusion_Matrix/false_positives_ref:0
                tf.identity(fn, name='false_negatives_ref'
                            )  # Confusion_Matrix/false_negatives_ref:0

                tf.summary.scalar('true_positives', tp[3])  # For Ths = 0.5
                tf.summary.scalar('true_negatives', tn[3])  # For Ths = 0.5
                tf.summary.scalar('false_positives', fp[3])  # For Ths = 0.5
                tf.summary.scalar('false_negatives', fn[3])  # For Ths = 0.5

            binarized_mask, binarized_mask_jpeg = image_processing.binarize_output(
                mask_image[0], threshold=0.5)
            tf.identity(binarized_mask_jpeg, name="mask_sample_ref")
            tf.summary.image('sample_mask', binarized_mask, 10)

            ##########################

            mask_max_val = tf.reduce_max(mask_image)
            tf.identity(mask_max_val, name='mask_max_val_ref')

            mask_min_val = tf.reduce_min(mask_image)
            tf.identity(mask_min_val, name='mask_min_val_ref')

            mask_mean_val = tf.reduce_mean(mask_image)
            tf.identity(mask_mean_val, name='mask_mean_val_ref')

            mask_std_val = tf.math.reduce_std(mask_image)
            tf.identity(mask_std_val, name='mask_std_val_ref')

            ##########################

            output_max_val = tf.reduce_max(y_pred)
            tf.identity(output_max_val, name='output_max_val_ref')

            output_min_val = tf.reduce_min(y_pred)
            tf.identity(output_min_val, name='output_min_val_ref')

            output_mean_val = tf.reduce_mean(y_pred)
            tf.identity(output_mean_val, name='output_mean_val_ref')

            output_std_val = tf.math.reduce_std(y_pred)
            tf.identity(output_std_val, name='output_std_val_ref')

            with tf.variable_scope("losses"):

                # ==============+ Reconstruction Loss ==================== #

                if params["loss_fn_name"] == "x-entropy":
                    reconstruction_loss = losses.reconstruction_x_entropy(
                        y_pred=y_pred, y_true=mask_image)

                elif params["loss_fn_name"] == "l2_loss":
                    reconstruction_loss = losses.reconstruction_l2loss(
                        y_pred=y_pred, y_true=mask_image)

                elif params["loss_fn_name"] == "dice_sorensen":
                    reconstruction_loss = 1 - losses.dice_coe(
                        y_pred=y_pred, y_true=mask_image, loss_type='sorensen')

                elif params["loss_fn_name"] == "dice_jaccard":
                    reconstruction_loss = 1 - losses.dice_coe(
                        y_pred=y_pred, y_true=mask_image, loss_type='jaccard')

                elif params["loss_fn_name"] == "adaptive_loss":
                    reconstruction_loss = losses.adaptive_loss(
                        y_pred=y_pred,
                        y_pred_logits=y_pred_logits,
                        y_true=mask_image,
                        switch_at_threshold=0.3,
                        loss_type='sorensen')

                else:
                    raise ValueError("Unknown loss function received: %s" %
                                     params["loss_fn_name"])

                tf.identity(reconstruction_loss,
                            name='reconstruction_loss_ref')
                tf.summary.scalar('reconstruction_loss', reconstruction_loss)

                if mode == tf.estimator.ModeKeys.TRAIN:

                    # ============== Regularization Loss ==================== #

                    l2_loss = losses.regularization_l2loss(
                        weight_decay=params["weight_decay"])

                    tf.identity(l2_loss, name='l2_loss_ref')
                    tf.summary.scalar('l2_loss', l2_loss)

                    total_loss = tf.add(reconstruction_loss,
                                        l2_loss,
                                        name="total_loss")

                else:
                    total_loss = reconstruction_loss

                tf.identity(total_loss, name='total_loss_ref')
                tf.summary.scalar('total_loss', total_loss)

            if mode == tf.estimator.ModeKeys.TRAIN:

                with tf.variable_scope("optimizers"):

                    # Update Global Step
                    global_step = tf.train.get_or_create_global_step()
                    tf.identity(global_step, name="global_step_ref")

                    learning_rate = tf.train.exponential_decay(
                        learning_rate=params["learning_rate"],
                        decay_steps=params["learning_rate_decay_steps"],
                        decay_rate=params["learning_rate_decay_factor"],
                        global_step=global_step,
                        staircase=True)

                    tf.identity(learning_rate, name="learning_rate_ref")
                    tf.summary.scalar('learning_rate_ref', learning_rate)

                    opt = tf.train.RMSPropOptimizer(
                        learning_rate=learning_rate,
                        use_locking=False,
                        centered=True,
                        decay=params["rmsprop_decay"],
                        momentum=params["rmsprop_momentum"],
                    )

                    if hvd_utils.is_using_hvd():
                        # Apply gradient compression using GRACE.
                        from grace_dl.tensorflow.communicator.allgather import Allgather
                        from grace_dl.tensorflow.compressor.topk import TopKCompressor
                        from grace_dl.tensorflow.memory.residual import ResidualMemory

                        world_size = hvd.size()
                        grc = Allgather(TopKCompressor(0.3), ResidualMemory(),
                                        world_size)
                        opt = hvd.DistributedOptimizer(opt,
                                                       grace=grc,
                                                       device_dense='/gpu:0')

                    if params["apply_manual_loss_scaling"]:

                        # if not hvd_utils.is_using_hvd() or hvd.rank() == 0:
                        #     Logger.log("Applying manual Loss Scaling ...")

                        loss_scale_manager = tf.contrib.mixed_precision.ExponentialUpdateLossScaleManager(
                            init_loss_scale=2**32,  # 4,294,967,296
                            incr_every_n_steps=1000)
                        opt = tf.contrib.mixed_precision.LossScaleOptimizer(
                            opt, loss_scale_manager)

                    deterministic = True
                    gate_gradients = (tf.train.Optimizer.GATE_OP
                                      if deterministic else
                                      tf.train.Optimizer.GATE_NONE)

                    backprop_op = opt.minimize(total_loss,
                                               gate_gradients=gate_gradients,
                                               global_step=global_step)

                    train_op = tf.group(
                        backprop_op,
                        tf.get_collection(tf.GraphKeys.UPDATE_OPS))

                    return tf.estimator.EstimatorSpec(
                        mode,
                        loss=total_loss,
                        train_op=train_op,
                    )

            elif mode == tf.estimator.ModeKeys.EVAL:

                return tf.estimator.EstimatorSpec(
                    mode,
                    loss=total_loss,
                    eval_metric_ops=eval_metrics,
                    predictions={"output": y_pred})

            else:
                raise NotImplementedError('Unknown mode {}'.format(mode))
예제 #5
0
def train(model,
          data_loader,
          criterion,
          optimizer,
          scheduler,
          num_epochs=5,
          epochs_earlystopping=10):
    logdir = './logs/' + time.strftime("%Y%m%d_%H%M%S")
    logdir = os.path.join(logdir)
    pathlib.Path(logdir).mkdir(parents=True, exist_ok=True)
    tb_writer = SummaryWriter(log_dir=logdir)

    best_acc = 0.0
    best_loss = sys.float_info.max
    best_iou = 0.0

    early_stopping = epochs_earlystopping

    for epoch in range(num_epochs):
        result = []
        early_stopping += 1

        for phase in ['train', 'val']:
            if phase == 'train':  # put the model in training mode
                model.train()
            else:
                # put the model in validation mode
                model.eval()

            # keep track of training and validation loss
            batch_nums = 0
            running_loss = 0.0
            running_iou = 0.0
            running_corrects = 0.0

            for (data, labels) in data_loader[phase]:
                # load the data and target to respective device
                (data, labels) = (data.to(device), labels.to(device))

                with torch.set_grad_enabled(phase == 'train'):
                    # feed the input
                    output = model(data)

                    # calculate the loss
                    loss = criterion(output, labels)

                    if phase == 'train':
                        # backward pass: compute gradient of the loss with respect to model parameters
                        loss.backward()

                        optimizer.step()

                        # zero the grad to stop it from accumulating
                        optimizer.zero_grad()

                # statistics
                batch_nums += 1
                running_loss += loss.item()
                running_iou += iou_score(output, labels)
                running_corrects += multi_acc(output, labels)

            if phase == 'train':
                scheduler.step(running_iou)

            # epoch statistics
            epoch_loss = running_loss / batch_nums
            epoch_iou = running_iou / batch_nums
            epoch_acc = running_corrects / batch_nums

            result.append('{} Loss: {:.4f} Acc: {:.4f} IoU: {:.4f}'.format(
                phase, epoch_loss, epoch_acc, epoch_iou))

            tb_writer.add_scalar('Loss/' + phase, epoch_loss, epoch)
            tb_writer.add_scalar('IoU/' + phase, epoch_iou, epoch)
            tb_writer.add_scalar('Accuracy/' + phase, epoch_acc, epoch)

            if phase == 'val' and epoch_iou > best_iou:
                early_stopping = 0

                best_acc = epoch_acc
                best_loss = epoch_loss
                best_iou = epoch_iou
                saveCheckpoint(CHECKPOINT_PATH, epoch, model, optimizer,
                               BATCH_SIZE)
                print(
                    'Checkpoint saved - Loss: {:.4f} Acc: {:.4f} IoU: {:.4f}'.
                    format(epoch_loss, epoch_acc, epoch_iou))

        print(result)

        if early_stopping == 10:
            break

    print('-----------------------------------------')
    print('Final Result: Loss: {:.4f} Acc: {:.4f}'.format(best_loss, best_acc))
    print('-----------------------------------------')
예제 #6
0
 def test_iou_score(self):
     a = np.array([1, 0, 0, 1])
     b = np.array([1, 1, 0, 0])
     iou = metrics.iou_score(a, b)
     expected = 1 / 3
     np.testing.assert_almost_equal(iou, expected, decimal=3)