Exemplo n.º 1
0
def main():
    """
    Starting point of the application
    """
    hvd.init()
    params = parse_args(PARSER.parse_args())
    set_flags(params)
    model_dir = prepare_model_dir(params)
    params.model_dir = model_dir
    logger = get_logger(params)

    model = Unet()

    dataset = Dataset(data_dir=params.data_dir,
                      batch_size=params.batch_size,
                      fold=params.fold,
                      augment=params.augment,
                      gpu_id=hvd.rank(),
                      num_gpus=hvd.size(),
                      seed=params.seed)

    if 'train' in params.exec_mode:
        train(params, model, dataset, logger)

    if 'evaluate' in params.exec_mode:
        if hvd.rank() == 0:
            evaluate(params, model, dataset, logger)

    if 'predict' in params.exec_mode:
        if hvd.rank() == 0:
            predict(params, model, dataset, logger)
Exemplo n.º 2
0
    def __init__(self, kwargs):
        super(DepthRegressorTrainer, self).__init__()
        self.hparams = kwargs

        if self.hparams.resize_input:
            self.unet = Unet(channels_in=3, channels_out=1)
        else:
            self.unet = UNetMini(channels_in=3, channels_out=1)

        self.dataset = lambda split: ScenesDataset(
            split, self.hparams.datasetdir, self.hparams.splitsdir, kwargs)
Exemplo n.º 3
0
    def train_model(self):
        '''
    Train a unet model
    '''
        from model.unet import Unet
        # prepare unet modela
        _unet = Unet(self.img_height, self.img_width, self.img_channels)
        inputs, outputs = _unet.model()
        model = Model(inputs=[inputs], outputs=[outputs])
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=[my_iou_metric])
        print(model.summary())

        ### FIT
        # Initialize our callbacks
        model_path = self.model_output_path  # TODO change  to ->model_output_path
        checkpoint = ModelCheckpoint(model_path,
                                     monitor="val_loss",
                                     mode="min",
                                     save_best_only=True,
                                     verbose=1)
        print('Crossedd checkpoint')
        earlystop = EarlyStopping(monitor='val_loss',
                                  min_delta=0,
                                  patience=5,
                                  verbose=1)
        # restore_best_weights = True)
        # Fit our model
        results = model.fit(X_train,
                            Y_train,
                            validation_split=0.1,
                            batch_size=16,
                            epochs=10,
                            callbacks=[earlystop, checkpoint])
        return model_path
Exemplo n.º 4
0
def main():
    """
    Starting point of the application
    """
    params = parse_args(description="UNet-medical")
    if params.use_horovod:
        hvd_init()
    set_flags(params)

    model_dir = prepare_model_dir(params)
    params.model_dir = model_dir
    logger = get_logger(params)

    tb_logger = None
    if params.tensorboard_logging:
        log_dir = params.log_dir
        if horovod_enabled() and params.log_all_workers:
            log_dir = os.path.join(log_dir, f'worker_{hvd_rank()}')
        tb_logger = namedtuple('TBSummaryWriters', 'train_writer eval_writer')(
            tf.summary.create_file_writer(log_dir),
            tf.summary.create_file_writer(os.path.join(log_dir, 'eval')))

    model = Unet()

    dataset = Dataset(data_dir=params.data_dir,
                      batch_size=params.batch_size,
                      fold=params.fold,
                      augment=params.augment,
                      hpu_id=hvd_rank() if horovod_enabled() else 0,
                      num_hpus=hvd_size() if horovod_enabled() else 1,
                      seed=params.seed)

    if 'train' in params.exec_mode:
        with dump_callback(params.dump_config):
            train(params, model, dataset, logger, tb_logger)

    if 'evaluate' in params.exec_mode:
        evaluate(params, model, dataset, logger, tb_logger)

    if 'predict' in params.exec_mode:
        predict(params, model, dataset, logger)
Exemplo n.º 5
0
    def __init__(self, kwargs):
        super(SceneNetTrainer, self).__init__()
        self.hparams = kwargs
        self.ifnet = IFNet()
        self.kernel_size = self.hparams.kernel_size

        self.dims = torch.tensor([139, 104, 112], device=self.device)
        self.dims = (self.dims / self.hparams.scale_factor).round().long()

        self.project = project(self.dims, self.kernel_size,
                               torch.tensor(self.hparams.sigma))
        if self.hparams.resize_input:
            self.unet = Unet(channels_in=3, channels_out=1)
        else:
            self.unet = UNetMini(channels_in=3, channels_out=1)

        if self.hparams.skip_unet:
            self.unet = None

        self.dataset = lambda split: scene_net_data(
            split, self.hparams.datasetdir, self.hparams.num_points, self.
            hparams.splitsdir, self.hparams)
Exemplo n.º 6
0
import datetime
from util.load_cfg import train_cfg, dataset_cfg, sample_cfg
from util.dice import *
from model.unet import Unet
from data.train import dataloader

checkpoint_dir = train_cfg["checkpoint_dir"]
log_dir = train_cfg["log_dir"]

if not os.path.exists(checkpoint_dir):
    os.mkdir(checkpoint_dir)

if not os.path.exists(log_dir):
    os.mkdir(log_dir)

model = Unet(sample_cfg["patch_size"])

# Learning rate and optimizer (学习率调整和优化器)
cosine_decay = tf.keras.experimental.CosineDecayRestarts(
    initial_learning_rate=train_cfg["init_lr"],
    first_decay_steps=12000,
    t_mul=1000,
    m_mul=0.5,
    alpha=1e-5)
optimizer = tf.keras.optimizers.Adam(learning_rate=cosine_decay)

# loss function (损失函数)
#loss=tf.keras.losses.BinaryCrossentropy(from_logits=False)

# metric record (性能指标记录器)
train_loss = tf.keras.metrics.Mean(name='train_loss')
Exemplo n.º 7
0

if __name__ == '__main__':
    images, labels = create_inputs_seg_hand(is_train=True)
    # tf.reshape(labels)
    session_config = tf.ConfigProto(
        device_count={'GPU': 0},
        gpu_options={
            'allow_growth': 1,
            # 'per_process_gpu_memory_fraction': 0.1,
            'visible_device_list': '0'
        },
        allow_soft_placement=True)  ##这个设置必须有,否则无论如何都会报cudnn不匹配的错误,BUG十分隐蔽,真是智障
    with tf.Session(config=session_config) as sess:
        # 1、先定义model才能执行第二步的初始化
        model = Unet(sess, cfg, is_train=True, size=(128, 128), l2_reg=0.0001)

        # 2、初始化和启动线程
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        if model_restore_name:
            model.restore(model_restore_name)

        # 3、训练模型
        # num_epochs=10000
        for i in range(start_epoch, num_epochs):
            since = time.time()
            #1、读图
            pics, pics_masks = sess.run([images, labels])  # 取出一个batchsize的图片
Exemplo n.º 8
0
os.makedirs(predict_pics_save, exist_ok=True)

if __name__ == '__main__':
    images, labels = create_inputs_seg_hand(is_train=is_train)

    session_config = tf.ConfigProto(
        device_count={'GPU': 0},
        gpu_options={
            'allow_growth': 1,
            # 'per_process_gpu_memory_fraction': 0.1,
            'visible_device_list': '0'
        },
        allow_soft_placement=True)  ##这个设置必须有,否则无论如何都会报cudnn不匹配的错误,BUG十分隐蔽,真是智障
    with tf.Session(config=session_config) as sess:
        # 1、先定义model才能执行第二步的初始化
        model = Unet(sess, cfg, is_train=is_train)

        # 2、初始化和启动线程
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        model.restore(model_restore_name)

        #3、测试图片
        index = 0
        for i in range(test_data_number // batch_size):
            pics, pics_masks = sess.run([images, labels])  # 取出一个batchsize的图片
            pics = pics / 255
            # 3、计算耗时
            since = time.time()
Exemplo n.º 9
0
if __name__ == "__main__":
    img_size = 256
    batch_size = 32
    train_path = './data/stage1_train/'
    test_path = './data/stage1_test/'

    X_train, Y_train, X_test, sizes_test = make_df(train_path, test_path,
                                                   img_size)
    xtr, xval, ytr, yval = train_test_split(X_train,
                                            Y_train,
                                            test_size=0.1,
                                            random_state=7)
    train_generator, val_generator = generator(xtr, xval, ytr, yval,
                                               batch_size)

    model = Unet(img_size)
    model.compile(
        optimizer='adam',
        loss=bce_dice_loss,
        metrics=[bce_dice_loss, recall_score, precision_score, rocauc_score])
    ckpt = ModelCheckpoint('.model.hdf5',
                           save_best_only=True,
                           monitor='val_rocauc_score',
                           mode='max')

    model.fit_generator(train_generator,
                        steps_per_epoch=len(xtr) / 6,
                        epochs=50,
                        validation_data=val_generator,
                        validation_steps=len(xval) / batch_size,
                        callbacks=[ckpt])
Exemplo n.º 10
0
def main():
    """
    Starting point of the application
    """

    flags = PARSER.parse_args()
    params = _cmd_params(flags)

    backends = [StdOutBackend(Verbosity.VERBOSE)]
    if params.log_dir is not None:
        backends.append(JSONStreamBackend(Verbosity.VERBOSE, params.log_dir))
    logger = Logger(backends)

    # Optimization flags
    os.environ['CUDA_CACHE_DISABLE'] = '0'

    os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'

    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = 'data'

    os.environ['TF_ADJUST_HUE_FUSED'] = 'data'
    os.environ['TF_ADJUST_SATURATION_FUSED'] = 'data'
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = 'data'

    os.environ['TF_SYNC_ON_FINISH'] = '0'
    os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'

    hvd.init()

    if params.use_xla:
        tf.config.optimizer.set_jit(True)

    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                   'GPU')

    if params.use_amp:
        tf.keras.mixed_precision.experimental.set_policy('mixed_float16')
    else:
        os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0'

    # Build the  model
    model = Unet()

    dataset = Dataset(data_dir=params.data_dir,
                      batch_size=params.batch_size,
                      fold=params.crossvalidation_idx,
                      augment=params.augment,
                      gpu_id=hvd.rank(),
                      num_gpus=hvd.size(),
                      seed=params.seed)

    if 'train' in params.exec_mode:
        train(params, model, dataset, logger)

    if 'evaluate' in params.exec_mode:
        if hvd.rank() == 0:
            model = restore_checkpoint(model, params.model_dir)
            evaluate(params, model, dataset, logger)

    if 'predict' in params.exec_mode:
        if hvd.rank() == 0:
            model = restore_checkpoint(model, params.model_dir)
            predict(params, model, dataset, logger)
Exemplo n.º 11
0
from util import trainer
from data.train_data_provider import ImageDataProvider
from model.unet import Unet
output_path = "/data/Cell/unet/model3/"
data_provider = ImageDataProvider("/data/Cell/unet/*.jpg")

net = Unet(layers=3, features_root=32, channels=3, n_class=2)
trainer = trainer.Trainer(net, optimizer="adam")
path = trainer.train(data_provider, output_path, training_iters=32, epochs=100)
Exemplo n.º 12
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    if 'deeplab' in args.model_name:
        if 'resnet101' in args.model_name:
            net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride,
                                backbone_type='resnet101')
        elif 'resnet50' in args.model_name:
            net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride,
                                backbone_type='resnet50')
        elif 'resnet34' in args.model_name:
            net = Deeplabv3plus(nInputChannels=3, n_classes=args.num_classes, os=args.output_stride,
                                backbone_type='resnet34')
    elif 'unet' in args.model_name:
        net = Unet(in_ch=3, out_ch=1)
    elif 'trfe' in args.model_name:
        if args.model_name == 'trfe':
            net = TRFENet(in_ch=3, out_ch=1)
        elif args.model_name == 'trfe1':
            net = TRFENet1(in_ch=3, out_ch=1)
        elif args.model_name == 'trfe2':
            net = TRFENet2(in_ch=3, out_ch=1)
    elif 'mtnet' in args.model_name:
        net = MTNet(in_ch=3, out_ch=1)
    elif 'segnet' in args.model_name:
        net = SegNet(input_channels=3, output_channels=1)
    elif 'fcn' in args.model_name:
        net = FCN8s(1)
    else:
        raise NotImplementedError
    net.load_state_dict(torch.load(args.load_path))
    net.cuda()

    composed_transforms_ts = transforms.Compose([
        trforms.FixedResize(size=(args.input_size, args.input_size)),
        trforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        trforms.ToTensor()])

    if args.test_dataset == 'TN3K':
        test_data = tn3k.TN3K(mode='test', transform=composed_transforms_ts, return_size=True)

    save_dir = args.save_dir + args.test_fold + '-' + args.test_dataset + os.sep + args.model_name + os.sep
    testloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=0)
    num_iter_ts = len(testloader)

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    net.cuda()
    net.eval()
    start_time = time.time()
    with torch.no_grad():
        total_iou = 0
        for sample_batched in tqdm(testloader):
            inputs, labels, label_name, size = sample_batched['image'], sample_batched['label'], sample_batched[
                'label_name'], sample_batched['size']
            inputs = Variable(inputs, requires_grad=False)
            labels = Variable(labels)
            labels = labels.cuda()
            inputs = inputs.cuda()
            if 'trfe' in args.model_name or 'mtnet' in args.model_name:
                outputs, _ = net.forward(inputs)
            else:
                outputs = net.forward(inputs)
            prob_pred = torch.sigmoid(outputs)
            iou = utils.get_iou(prob_pred, labels)
            total_iou += iou

            shape = (size[0, 0], size[0, 1])
            prob_pred = F.interpolate(prob_pred, size=shape, mode='bilinear', align_corners=True).cpu().data
            save_data = prob_pred[0]
            save_png = save_data[0].numpy()
            save_png = np.round(save_png)
            save_png = save_png * 255
            save_png = save_png.astype(np.uint8)
            save_path = save_dir + label_name[0]
            if not os.path.exists(save_path[:save_path.rfind('/')]):
                os.makedirs(save_path[:save_path.rfind('/')])
            cv2.imwrite(save_dir + label_name[0], save_png)

    print(args.model_name + ' iou:' + str(total_iou / len(testloader)))
    duration = time.time() - start_time
    print("-- %s contain %d images, cost time: %.4f s, speed: %.4f s." % (
        args.test_dataset, num_iter_ts, duration, duration / num_iter_ts))
    print("------------------------------------------------------------------")
Exemplo n.º 13
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__)))
    if args.resume_epoch != 0:
        runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) if runs else 0
    else:
        runs = sorted(glob.glob(os.path.join(save_dir_root, 'run', 'run_*')))
        run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0

    if args.run_id >= 0:
        run_id = args.run_id

    save_dir = os.path.join(save_dir_root, 'run', 'run_' + str(run_id))
    log_dir = os.path.join(
        save_dir,
        datetime.now().strftime('%b%d_%H-%M-%S') + '_' + socket.gethostname())
    writer = SummaryWriter(log_dir=log_dir)
    batch_size = args.batch_size

    if 'deeplab' in args.model_name:
        if 'resnet101' in args.model_name:
            net = Deeplabv3plus(nInputChannels=3,
                                n_classes=args.num_classes,
                                os=args.output_stride,
                                backbone_type='resnet101')
        elif 'resnet50' in args.model_name:
            net = Deeplabv3plus(nInputChannels=3,
                                n_classes=args.num_classes,
                                os=args.output_stride,
                                backbone_type='resnet50')
        elif 'resnet34' in args.model_name:
            net = Deeplabv3plus(nInputChannels=3,
                                n_classes=args.num_classes,
                                os=args.output_stride,
                                backbone_type='resnet34')
        else:
            raise NotImplementedError
    elif 'unet' in args.model_name:
        net = Unet(in_ch=3, out_ch=1)
    elif 'trfe' in args.model_name:
        if args.model_name == 'trfe1':
            net = TRFENet1(in_ch=3, out_ch=1)
        elif args.model_name == 'trfe2':
            net = TRFENet2(in_ch=3, out_ch=1)
        elif args.model_name == 'trfe':
            net = TRFENet(in_ch=3, out_ch=1)
        batch_size = 4
    elif 'mtnet' in args.model_name:
        net = MTNet(in_ch=3, out_ch=1)
        batch_size = 4
    elif 'segnet' in args.model_name:
        net = SegNet(input_channels=3, output_channels=1)
    elif 'fcn' in args.model_name:
        net = FCN8s(1)
    else:
        raise NotImplementedError

    if args.resume_epoch == 0:
        print('Training ' + args.model_name + ' from scratch...')
    else:
        load_path = os.path.join(
            save_dir,
            args.model_name + '_epoch-' + str(args.resume_epoch) + '.pth')
        print('Initializing weights from: {}...'.format(load_path))
        net.load_state_dict(torch.load(load_path))

    if args.pretrain == 'THYROID':
        net.load_state_dict(
            torch.load('./pre_train/thyroid-pretrain.pth',
                       map_location=lambda storage, loc: storage))
        print('loading pretrain model......')

    torch.cuda.set_device(device=0)
    net.cuda()

    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum)

    if args.criterion == 'Dice':
        criterion = soft_dice
    else:
        raise NotImplementedError

    composed_transforms_tr = transforms.Compose([
        trforms.FixedResize(size=(args.input_size, args.input_size)),
        trforms.RandomHorizontalFlip(),
        trforms.Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225)),
        trforms.ToTensor()
    ])

    composed_transforms_ts = transforms.Compose([
        trforms.FixedResize(size=(args.input_size, args.input_size)),
        trforms.Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225)),
        trforms.ToTensor()
    ])

    if args.dataset == 'TN3K':
        train_data = tn3k.TN3K(mode='train',
                               transform=composed_transforms_tr,
                               fold=args.fold)
        val_data = tn3k.TN3K(mode='val',
                             transform=composed_transforms_ts,
                             fold=args.fold)
    elif args.dataset == 'TG3K':
        train_data = tg3k.TG3K(mode='train', transform=composed_transforms_tr)
        val_data = tg3k.TG3K(mode='val', transform=composed_transforms_ts)
    elif args.dataset == 'TATN':
        train_data = tatn.TATN(mode='train',
                               transform=composed_transforms_tr,
                               fold=args.fold)
        val_data = tatn.TATN(mode='val',
                             transform=composed_transforms_ts,
                             fold=args.fold)

    trainloader = DataLoader(train_data,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=0)
    testloader = DataLoader(val_data,
                            batch_size=1,
                            shuffle=False,
                            num_workers=0)

    num_iter_tr = len(trainloader)
    num_iter_ts = len(testloader)
    nitrs = args.resume_epoch * num_iter_tr
    nsamples = args.resume_epoch * len(train_data)
    print('nitrs: %d num_iter_tr: %d' % (nitrs, num_iter_tr))
    print('nsamples: %d tot_num_samples: %d' % (nsamples, len(train_data)))

    aveGrad = 0
    global_step = 0
    recent_losses = []
    start_t = time.time()

    best_f, cur_f = 0.0, 0.0
    for epoch in range(args.resume_epoch, args.nepochs):
        net.train()
        epoch_losses = []
        for ii, sample_batched in enumerate(trainloader):
            if 'trfe' in args.model_name or args.model_name == 'mtnet':
                nodules, glands = sample_batched
                inputs_n, labels_n = nodules['image'].cuda(
                ), nodules['label'].cuda()
                inputs_g, labels_g = glands['image'].cuda(
                ), glands['label'].cuda()
                inputs = torch.cat(
                    [inputs_n[0].unsqueeze(0), inputs_g[0].unsqueeze(0)],
                    dim=0)

                for i in range(1, inputs_n.size()[0]):
                    inputs = torch.cat([inputs, inputs_n[i].unsqueeze(0)],
                                       dim=0)
                    inputs = torch.cat([inputs, inputs_g[i].unsqueeze(0)],
                                       dim=0)

                global_step += inputs.data.shape[0]
                nodule, thyroid = net.forward(inputs)
                loss = 0
                for i in range(inputs.size()[0]):
                    if i % 2 == 0:
                        loss += criterion(nodule[i],
                                          labels_n[int(i / 2)],
                                          size_average=False,
                                          batch_average=True)
                    else:
                        loss += 0.5 * criterion(thyroid[i],
                                                labels_g[int((i - 1) / 2)],
                                                size_average=False,
                                                batch_average=True)

            else:
                inputs, labels = sample_batched['image'].cuda(
                ), sample_batched['label'].cuda()
                global_step += inputs.data.shape[0]

                outputs = net.forward(inputs)
                loss = criterion(outputs,
                                 labels,
                                 size_average=False,
                                 batch_average=True)

            trainloss = loss.item()
            epoch_losses.append(trainloss)
            if len(recent_losses) < args.log_every:
                recent_losses.append(trainloss)
            else:
                recent_losses[nitrs % len(recent_losses)] = trainloss

            # Backward the averaged gradient
            loss.backward()
            aveGrad += 1
            nitrs += 1
            nsamples += args.batch_size

            # Update the weights once in p['nAveGrad'] forward passes
            if aveGrad % args.naver_grad == 0:
                optimizer.step()
                optimizer.zero_grad()
                aveGrad = 0

            if nitrs % args.log_every == 0:
                meanloss = sum(recent_losses) / len(recent_losses)
                print('epoch: %d ii: %d trainloss: %.2f timecost:%.2f secs' %
                      (epoch, ii, meanloss, time.time() - start_t))
                writer.add_scalar('data/trainloss', meanloss, nsamples)

        meanloss = sum(epoch_losses) / len(epoch_losses)
        print('epoch: %d meanloss: %.2f' % (epoch, meanloss))
        writer.add_scalar('data/epochloss', meanloss, nsamples)

        if args.use_test == 1:
            prec_lists = []
            recall_lists = []
            sum_testloss = 0.0
            total_mae = 0.0
            cnt = 0
            count = 0
            iou = 0
            if args.use_eval == 1:
                net.eval()
            for ii, sample_batched in enumerate(testloader):
                inputs, labels = sample_batched['image'].cuda(
                ), sample_batched['label'].cuda()
                with torch.no_grad():
                    if 'trfe' in args.model_name or args.model_name == 'mtnet':
                        outputs, _ = net.forward(inputs)
                    else:
                        outputs = net.forward(inputs)

                loss = criterion(outputs,
                                 labels,
                                 size_average=False,
                                 batch_average=True)
                sum_testloss += loss.item()

                predictions = torch.sigmoid(outputs)

                iou += utils.get_iou(predictions, labels)
                count += 1

                total_mae += utils.get_mae(predictions,
                                           labels) * predictions.size(0)
                prec_list, recall_list = utils.get_prec_recall(
                    predictions, labels)
                prec_lists.extend(prec_list)
                recall_lists.extend(recall_list)
                cnt += predictions.size(0)

                if ii % num_iter_ts == num_iter_ts - 1:
                    mmae = total_mae / cnt
                    mean_testloss = sum_testloss / num_iter_ts
                    mean_prec = sum(prec_lists) / len(prec_lists)
                    mean_recall = sum(recall_lists) / len(recall_lists)
                    fbeta = 1.3 * mean_prec * mean_recall / (0.3 * mean_prec +
                                                             mean_recall)
                    iou = iou / count

                    print('Validation:')
                    print(
                        'epoch: %d, numImages: %d testloss: %.2f mmae: %.4f fbeta: %.4f iou: %.4f'
                        % (epoch, cnt, mean_testloss, mmae, fbeta, iou))
                    writer.add_scalar('data/validloss', mean_testloss,
                                      nsamples)
                    writer.add_scalar('data/validmae', mmae, nsamples)
                    writer.add_scalar('data/validfbeta', fbeta, nsamples)
                    writer.add_scalar('data/validiou', iou, epoch)

                    cur_f = iou
                    if cur_f > best_f:
                        save_path = os.path.join(
                            save_dir, args.model_name + '_best' + '.pth')
                        torch.save(net.state_dict(), save_path)
                        print("Save model at {}\n".format(save_path))
                        best_f = cur_f

        if epoch % args.save_every == args.save_every - 1:
            save_path = os.path.join(
                save_dir, args.model_name + '_epoch-' + str(epoch) + '.pth')
            torch.save(net.state_dict(), save_path)
            print("Save model at {}\n".format(save_path))