Exemplo n.º 1
0
def test(cfg, writer, logger):
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))
    ## create dataset
    default_gpu = cfg['model']['default_gpu']
    device = torch.device(
        "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu')
    datasets = create_dataset(
        cfg, writer, logger
    )  #source_train\ target_train\ source_valid\ target_valid + _loader

    model = CustomModel(cfg, writer, logger)
    running_metrics_val = runningScore(cfg['data']['target']['n_class'])
    source_running_metrics_val = runningScore(cfg['data']['target']['n_class'])
    val_loss_meter = averageMeter()
    source_val_loss_meter = averageMeter()
    time_meter = averageMeter()
    loss_fn = get_loss_function(cfg)
    path = cfg['test']['path']
    checkpoint = torch.load(path)
    model.adaptive_load_nets(model.BaseNet,
                             checkpoint['DeepLab']['model_state'])

    validation(
                model, logger, writer, datasets, device, running_metrics_val, val_loss_meter, loss_fn,\
                source_val_loss_meter, source_running_metrics_val, iters = model.iter
                )
Exemplo n.º 2
0
 def __init__(self, numbers=19):
     self.class_numbers = numbers
     self.classes_recall_thr = np.zeros([19, 3])
     self.classes_recall_thr_num = np.zeros([19])
     self.classes_recall_clu = np.zeros([19, 3])
     self.classes_recall_clu_num = np.zeros([19])
     self.running_metrics_val_threshold = runningScore(self.class_numbers)
     self.running_metrics_val_clusters = runningScore(self.class_numbers)
     self.clu_threshold = np.full((19), 2.5)
Exemplo n.º 3
0
def train(data_loader,model,epoch,num_batch,learning_rate):
	####  Model One
	#resnet = PM.ResNet(PM.BottleBlock(), FLAGS.kernal_num, True, 0.5)
	#logites = resnet(tf_image)  ## (batch, 7, size, size)

	#Loss = PM.Dec_Loss_2(logites=logites, gt_texts=tf_gt, gt_kernels=tf_kernal, training_masks=tf_mask)

	running_metric_text = runningScore(2)
	running_metric_kernel = runningScore(2)

	'''
	learning_rate = tf.train.exponential_decay(
		learning_rate=FLAGS.learning_rate,
		global_step= step,
		decay_steps=num_batch * 200,
		decay_rate=0.5,
		staircase=True)
	'''


	#optim = tf.train.AdamOptimizer(learning_rate=learning_rate)
	optim = tf.optimizers.Adam(learning_rate=learning_rate)

	step = epoch * num_batch
	for i in range(num_batch):
		images, gt_texts, train_masks, kernal_images = pre_tools.batch_data(data_loader, i, FLAGS.batch_size)

		images = tf.convert_to_tensor(images, dtype=tf.float32)
		gt_texts = tf.convert_to_tensor(gt_texts, dtype=tf.float32)
		train_masks = tf.convert_to_tensor(train_masks, dtype=tf.float32)
		kernal_images = tf.convert_to_tensor(kernal_images, dtype=tf.float32)

		with tf.GradientTape() as tape:
			logites = model(images) ### (32,320,320,3)
			logites = tf.transpose(logites, (0, 3, 1, 2)) ### (32,3,320,320)
			Loss = PM.Dec_Loss_2(logites=logites, gt_texts=gt_texts,
								 gt_kernels=kernal_images, training_masks=train_masks,kernal=FLAGS.kernal_num)

		# 计算梯度 tape模式,保持跟踪
		grads = tape.gradient(Loss, model.trainable_weights)
		optim.apply_gradients(zip(grads, model.trainable_weights))

		texts = logites[:, 0, :, :]
		kernels = logites[:, 1:, :, :]
		score_text = cal_text_score(texts, gt_texts, train_masks, running_metric_text)
		score_kernel = cal_kernel_score(kernels, kernal_images, gt_texts, train_masks, running_metric_kernel)
		acc = score_text['Mean Acc']
		iou_t = score_text['Mean IoU']
		iou_k = score_kernel['Mean IoU']
		step = step + 1

		if i % 20 == 0:
			information = '## Epoch:{:d}  Step_Train / Total_Batch: {:d} / {:d}  train_loss= {:5f}  train_acc= {:5f} IOU_t={:5f} IOU_k={:5f}'. \
				format(epoch,step, num_batch, Loss, acc,iou_t,iou_k)
			print(information)  ### 输出到屏幕
Exemplo n.º 4
0
 def __init__(self, numbers=19, modal_num=3, model=None):
     self.class_numbers = numbers
     self.classes_recall_thr = np.zeros([19, 3])
     self.classes_recall_thr_num = np.zeros([19])
     self.classes_recall_clu = np.zeros([19, 3])
     self.classes_recall_clu_num = np.zeros([19])
     self.running_metrics_val_threshold = runningScore(self.class_numbers)
     self.running_metrics_val_clusters = runningScore(self.class_numbers)
     self.clu_threshold = torch.full((modal_num + 1, 19), 3.0).cuda()
     self.multimodal_merger = CustomMetricsMultimodalMerger(
         modal_num=modal_num + 1, category_num=numbers, model=model
     )
Exemplo n.º 5
0
def boxplotvis(cfg):

    # device = torch.device("cuda:{}".format(cfg["other"]["gpu_idx"]) if torch.cuda.is_available() else "cpu")
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]
    v_loader = data_loader(data_path, split='val')

    n_classes = v_loader.n_classes
    n_val = len(v_loader.files['val'])

    # test differnet models' prediction
    vgg16lstm_metric = runningScore(n_classes, n_val)
    vgg16gru_metric = runningScore(n_classes, n_val)
    segnet_metric = runningScore(n_classes, n_val)

    with torch.no_grad():
        for i_val, (images_val, labels_val,
                    img_name_val) in tqdm(enumerate(v_loader)):
            gt = np.squeeze(labels_val.data.cpu().numpy())
            vgg16lstm_pred = m.imread(
                pjoin(cfg["data"]["pred_path"], 'vgg16_lstm_brainweb',
                      img_name_val + '.bmp'))
            vgg16gru_pred = m.imread(
                pjoin(cfg["data"]["pred_path"], 'vgg16_gru_brainweb',
                      img_name_val + '.bmp'))
            segnet_pred = m.imread(
                pjoin(cfg["data"]["pred_path"], 'segnet_brainweb',
                      img_name_val + '.bmp'))

            vgg16lstm_encode = v_loader.encode_segmap(vgg16lstm_pred)
            vgg16gru_encode = v_loader.encode_segmap(vgg16gru_pred)
            segnet_encode = v_loader.encode_segmap(segnet_pred)

            vgg16lstm_metric.update(gt, vgg16lstm_encode, i_val)
            vgg16gru_metric.update(gt, vgg16gru_encode, i_val)
            segnet_metric.update(gt, segnet_encode, i_val)

    vgg16lstm_acc_all, vgg16lstm_dsc_cls = vgg16lstm_metric.get_list()
    vgg16gru_acc_all, vgg16gru_dsc_cls = vgg16gru_metric.get_list()
    segnet_acc_all, segnet_dsc_cls = segnet_metric.get_list()
    # dsc_list = [vgg16lstm_dsc_cls.transpose(), vgg16gru_dsc_cls.transpose(), segnet_dsc_cls.transpose()]

    data0 = array2dataframe(vgg16lstm_dsc_cls)
    data0['Method'] = 'VGG16-LSTM'
    data1 = array2dataframe(vgg16gru_dsc_cls)
    data1['Method'] = 'VGG16-GRU'
    data2 = array2dataframe(segnet_dsc_cls)
    data2['Method'] = 'SegNet'
    data = pd.concat([data0, data1, data2])
Exemplo n.º 6
0
def val(args, model, dataloader):
    segmiou_cal = runningScore(n_classes=args.num_classes)
    posmiou_cal = runningScore(n_classes=args.num_char)
    model.eval()
    with torch.no_grad():
        for i, (data, seg, pos) in enumerate(dataloader):
            seg_pred, pos_pred = model(data)
            seg_pred = seg_pred.cpu().numpy()
            seg_pred = np.argmax(seg_pred, axis=1)
            seg = seg.numpy()
            segmiou_cal.update(seg, seg_pred)
            pos_pred = pos_pred.cpu().numpy()
            pos_pred = np.argmax(pos_pred, axis=1)
            pos = pos.numpy()
            posmiou_cal.update(pos, pos_pred)
    segmiou = segmiou_cal.get_scores()
    posmiou = posmiou_cal.get_scores()
    print('segmiou:{}'.format(segmiou))
    print('posmiou:{}'.format(posmiou))
    return segmiou, posmiou
Exemplo n.º 7
0
def main(test_args):

    testset = "/mnt/iusers01/eee01/mchiwml4/CamVid/test"
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean, std)])
    test_dataset = DataLoader(Loaddata(testset,
                                       transform=transform,
                                       target_transform=MaskToTensor()),
                              batch_size=1,
                              shuffle=False,
                              num_workers=8)

    label_num = 11
    model = segnetmodel.segnet(label_num)
    model = model.cuda()
    model.load_state_dict(torch.load(test_args.load_param))
    model.eval()

    total = np.zeros((label_num, ))

    running_metrics = runningScore(label_num)

    for j, data in enumerate(test_dataset):
        inputs, labels = data
        inputs = Variable(inputs.cuda())

        outputs = model(inputs)

        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.numpy()

        running_metrics.update(gt, pred)

        for i in range(label_num):
            mask = gt == i  # ground truth mask of class i
            total[i] += np.sum(
                mask)  # total number of pixels of class i (tp+fn)

    score, class_iou, class_acc = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)
    print('class iou: ')
    for i in range(label_num):
        print(i, class_iou[i])
    print('class acc: ')
    for i in range(label_num):
        print(i, class_acc[i])

    print('number of pixels:')
    print(total)
Exemplo n.º 8
0
def test(opt, logger):
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    random.seed(opt.seed)
    ## create dataset
    device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
    datasets = create_dataset(opt, logger) 
 
    if opt.model_name == 'deeplabv2':
        checkpoint = torch.load(opt.resume_path)['ResNet101']["model_state"]
        model = adaptation_modelv2.CustomModel(opt, logger)
        model.BaseNet.load_state_dict(checkpoint)
    
    running_metrics_val = runningScore(opt.n_class)

    validation(model, logger, datasets, device, running_metrics_val)
Exemplo n.º 9
0
def validate(val_loader, model, criterion):
    with torch.no_grad():
        model.eval()
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        running_metric_text = runningScore(2)
        running_metric_kernel = runningScore(2)

        end = time.time()
        for batch_idx, (imgs, gt_texts, gt_kernels,
                        training_masks) in enumerate(val_loader):
            data_time.update(time.time() - end)

            imgs = Variable(imgs.cuda())
            gt_texts = Variable(gt_texts.cuda())
            gt_kernels = Variable(gt_kernels.cuda())
            training_masks = Variable(training_masks.cuda())

            outputs = model(imgs)
            texts = outputs[:, 0, :, :]
            kernels = outputs[:, 1:, :, :]

            selected_masks = ohem_batch(texts, gt_texts, training_masks)
            selected_masks = Variable(selected_masks.cuda())

            loss_text = criterion(texts, gt_texts, selected_masks)

            loss_kernels = []
            mask0 = torch.sigmoid(texts).data.cpu().numpy()
            mask1 = training_masks.data.cpu().numpy()
            selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
            selected_masks = torch.from_numpy(selected_masks).float()
            selected_masks = Variable(selected_masks.cuda())
            for i in range(6):
                kernel_i = kernels[:, i, :, :]
                gt_kernel_i = gt_kernels[:, i, :, :]
                loss_kernel_i = criterion(kernel_i, gt_kernel_i,
                                          selected_masks)
                loss_kernels.append(loss_kernel_i)
            loss_kernel = sum(loss_kernels) / len(loss_kernels)

            loss = 0.7 * loss_text + 0.3 * loss_kernel
            losses.update(loss.item(), imgs.size(0))

            score_text = cal_text_score(texts, gt_texts, training_masks,
                                        running_metric_text)
            score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                            training_masks,
                                            running_metric_kernel)

            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % 5 == 0:
                output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min '.format(
                    batch=batch_idx + 1,
                    size=len(val_loader),
                    bt=batch_time.avg,
                    total=batch_time.avg * batch_idx / 60.0,
                    eta=batch_time.avg * (len(val_loader) - batch_idx) / 60.0)
                print(output_log)
                sys.stdout.flush()

    return (float(losses.avg), float(score_text['Mean Acc']),
            float(score_kernel['Mean Acc']), float(score_text['Mean IoU']),
            float(score_kernel['Mean IoU']))
Exemplo n.º 10
0
def train(args):
    os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu_id)
    #torch.manual_seed(1337)
    print(args)
    # setup dataloader
    t_loader=MR18loader_CV(root=args.data_path,val_num=args.val_num,is_val=False,is_transform=True,is_flip=True,is_rotate=True,is_crop=True,is_histeq=True,forest=args.num_forest)
    v_loader=MR18loader_CV(root=args.data_path,val_num=args.val_num,is_val=True,is_transform=True,is_flip=False,is_rotate=False,is_crop=True,is_histeq=True,forest=args.num_forest)
    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=1, shuffle=True)
    valloader = data.DataLoader(v_loader, batch_size=1, num_workers=1,shuffle=True)
    # setup Metrics
    running_metrics_single = runningScore(n_classes)
    running_metrics_single_test = runningScore(4)
    # setup Model
    model=fcn_mul(n_classes=n_classes)
    vgg16 = models.vgg16(pretrained=True)
    model.init_vgg16_params(vgg16)
    model.cuda()
    # setup optimizer and loss
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4)
    loss_ce = cross_entropy2d
    #loss_ce_weight = weighted_loss
    #loss_dc = dice_loss
    #loss_hed= bce2d_hed
    # resume
    best_iou=-100.0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            best_iou=checkpoint['best_iou']
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer_state'])
            print("Loaded checkpoint '{}' (epoch {}), best_iou={}"
                  .format(args.resume, checkpoint['epoch'],best_iou))
        else:
            best_iou=-100.0
            print("No checkpoint found at '{}'".format(args.resume))
    # visualization
    t = []
    loss_seg_list=[]
    loss_hed_list=[]
    Dice_mean=[]
    Dice_CSF=[]
    Dice_GM=[]
    Dice_WM=[]
    t_pre=time.time()
    print('training prepared, cost {} seconds\n\n'.format(t_pre-t_begin))
    for epoch in range(args.n_epoch):
        t.append(epoch+1)
        model.train()
        adjust_learning_rate(optimizer,epoch)
        #loss_sum=0.0
        loss_epoch=0.0
        t_epoch=time.time()
        for i_train, (regions,T1s,IRs,T2s,lbls) in enumerate(trainloader):
            T1s=Variable(T1s.cuda())
            IRs,T2s=Variable(IRs.cuda()),Variable(T2s.cuda())
            lbls=Variable(lbls.cuda()[:,int(args.num_forest/2),:,:].unsqueeze(1))
            #edges=Variable(edges.cuda()[:,int(args.num_forest/2),:,:].unsqueeze(1))
            optimizer.zero_grad()
            outputs=model(T1s,IRs,T2s)
            seg_out=F.log_softmax(outputs,dim=1)
            max_prob,_=torch.max(seg_out,dim=1)
            max_prob=-max_prob.detach().unsqueeze(1)
            loss_seg_value=loss_ce(input=outputs,target=lbls) 
                    #+0.5*loss_dc(input=outputs,target=lbls)
                    #+0.5*loss_ce_weight(input=outputs,target=lbls,weight=max_prob)\
                    #+0.5*loss_ce_weight(input=outputs,target=lbls,weight=edges)\
            #loss_hed_value=loss_hed(input=outputs[1],target=edges)
                #+0.5*loss_hed(input=outputs[2],target=edges) \
                #+0.5*loss_hed(input=outputs[3],target=edges) \
                #+0.5*loss_hed(input=outputs[4],target=edges) \
                #+0.5*loss_hed(input=outputs[5],target=edges)
            loss=loss_seg_value
            #loss=loss_seg_value+loss_hed_value
            # loss average
            #loss_sum+=loss
            #if (i_train+1)%args.loss_avg==0:
            #    loss_sum/=args.loss_avg
            #    loss_sum.backward()
            #    optimizer.step()
            #    loss_sum=0.0
            loss.backward()
            optimizer.step()
            loss_epoch+=loss.item()
            # visualization
            if i_train==40:
                ax1=plt.subplot(241)
                ax1.imshow(T1s[0,1,:,:].data.cpu().numpy(),cmap ='gray')
                ax1.set_title('train_img')
                ax1.axis('off')
                ax2=plt.subplot(242)
                ax2.imshow(t_loader.decode_segmap(lbls[0,0,:,:].data.cpu().numpy()).astype(np.uint8))
                ax2.set_title('train_label')
                ax2.axis('off')
                ax3=plt.subplot(243)
                model.eval()
                train_show=model(T1s,IRs,T2s)
                ax3.imshow(t_loader.decode_segmap(train_show[0].data.max(0)[1].cpu().numpy()).astype(np.uint8))
                ax3.set_title('train_predict')
                ax3.axis('off')
                ax4=plt.subplot(244)
                ax4.imshow(max_prob[0,0].cpu().numpy())
                ax4.set_title('uncertainty')
                ax4.axis('off')
                model.train()
        loss_epoch/=i_train
        loss_seg_list.append(loss_epoch)
        loss_hed_list.append(0)
        t_train=time.time()
        print('epoch: ',epoch+1)
        print('--------------------------------Training--------------------------------')
        print('average loss in this epoch: ',loss_epoch)
        print('final loss in this epoch: ',loss.data.item())
        print('cost {} seconds up to now'.format(t_train-t_begin))
        print('cost {} seconds in this train epoch'.format(t_train-t_epoch))

        model.eval()
        for i_val, (regions_val,T1s_val,IRs_val,T2s_val,lbls_val) in enumerate(valloader):
            T1s_val=Variable(T1s_val.cuda())
            IRs_val,T2s_val=Variable(IRs_val.cuda()),Variable(T2s_val.cuda())
            with torch.no_grad():
                outputs_single=model(T1s_val,IRs_val,T2s_val)[0,:,:,:]
            # get predict
            pred_single=outputs_single.data.max(0)[1].cpu().numpy()
            # pad to 240
            pred_pad=np.zeros((240,240),np.uint8)
            pred_pad[regions_val[0]:regions_val[1],regions_val[2]:regions_val[3]]=  \
                    pred_single[0:regions_val[1]-regions_val[0],0:regions_val[3]-regions_val[2]]
            # convert to 3 classes
            pred_single_test=np.zeros((240,240),np.uint8)
            pred_single_test=v_loader.lbl_totest(pred_pad)
            # get gt
            gt = lbls_val[0][int(args.num_forest/2)].numpy()
            # pad to 240
            gt_pad=np.zeros((240,240),np.uint8)
            gt_pad[regions_val[0]:regions_val[1],regions_val[2]:regions_val[3]]=  \
                    gt[0:regions_val[1]-regions_val[0],0:regions_val[3]-regions_val[2]]
            # convert to 3 classes
            gt_test=np.zeros((240,240),np.uint8)
            gt_test=v_loader.lbl_totest(gt_pad)
            # metrics update
            running_metrics_single.update(gt_pad, pred_pad)
            running_metrics_single_test.update(gt_test, pred_single_test)
            # visualization
            if i_val==40:
                ax5=plt.subplot(245)
                ax5.imshow((T1s_val[0,int(args.num_forest/2),:,:].data.cpu().numpy()*255+t_loader.T1mean).astype(np.uint8),cmap ='gray')
                ax5.set_title('src_img')
                ax5.axis('off')
                ax6=plt.subplot(246)
                ax6.imshow(t_loader.decode_segmap(gt).astype(np.uint8))
                ax6.set_title('gt')
                ax6.axis('off')
                ax7=plt.subplot(247)
                ax7.imshow(t_loader.decode_segmap(pred_single).astype(np.uint8))
                ax7.set_title('pred_single')
                ax7.axis('off')
                ax8=plt.subplot(248)
                ax8.imshow(pred_single_test[regions_val[0]:regions_val[1],regions_val[2]:regions_val[3]].astype(np.uint8))
                ax8.set_title('pred_single_test')
                ax8.axis('off')
                plt.tight_layout()
                plt.subplots_adjust(wspace=.1,hspace=.3)
                plt.savefig('./fig_out/val_{}_out_{}.png'.format(str(args.val_num),epoch+1))
        # compute dice coefficients during validation
        score_single, class_iou_single = running_metrics_single.get_scores()
        score_single_test, class_iou_single_test = running_metrics_single_test.get_scores()
        Dice_mean.append(score_single['Mean Dice : \t'])
        Dice_CSF.append(score_single_test['Dice : \t'][1])
        Dice_GM.append(score_single_test['Dice : \t'][2])
        Dice_WM.append(score_single_test['Dice : \t'][3])
        print('--------------------------------All tissues--------------------------------')
        print('Back: Background,')
        print('GM: Cortical GM(red), Basal ganglia(green),')
        print('WM: WM(yellow), WM lesions(blue),')
        print('CSF: CSF(pink), Ventricles(light blue),')
        print('Back: Cerebellum(white), Brainstem(dark red)')
        print('single predict: ')
        for k, v in score_single.items():
            print(k, v)
        print('--------------------------------Only tests--------------------------------')
        print('tissue : Back , CSF , GM , WM')
        print('single predict: ')
        for k, v in score_single_test.items():
            print(k, v)
        t_test=time.time()
        print('cost {} seconds up to now'.format(t_test-t_begin))
        print('cost {} seconds in this validation epoch'.format(t_test-t_train))
        # save model at best validation metrics
        if score_single['Mean Dice : \t'] >= best_iou:
            best_iou = score_single['Mean Dice : \t']
            state = {'epoch': epoch+1,
                     'model_state': model.state_dict(),
                     'optimizer_state' : optimizer.state_dict(),
                     'best_iou':best_iou}
            torch.save(state, "val_{}_best.pkl".format(str(args.val_num)))
            print('model saved!!!')
        # save model every 10 epochs
        if (epoch+1)%10==0:
            state = {'epoch': epoch+1,
                     'model_state': model.state_dict(),
                     'optimizer_state' : optimizer.state_dict(),
                     'score':score_single}
            torch.save(state, "val_{}_e_{}.pkl".format(str(args.val_num),epoch+1))
        # plot curve
        ax1=plt.subplot(211)
        ax1.plot(t,loss_seg_list,'g')
        ax1.plot(t,loss_hed_list,'r')
        ax1.set_title('train loss')
        ax2=plt.subplot(212)
        ax2.plot(t,Dice_mean,'k')
        ax2.plot(t,Dice_CSF,'r')
        ax2.plot(t,Dice_GM,'g')
        ax2.plot(t,Dice_WM,'b')
        ax2.set_title('validate Dice, R/G/B for CSF/GM/WM')
        plt.tight_layout()
        plt.subplots_adjust(wspace=0,hspace=.3)
        plt.savefig('./fig_out/val_{}_curve.png'.format(str(args.val_num)))
        # metric reset
        running_metrics_single.reset()
        running_metrics_single_test.reset()
        print('\n\n')
Exemplo n.º 11
0
def train(train_loader, model, criterion, optimizer, epoch):
    #model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)

    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels, training_masks,
                    data_length) in enumerate(train_loader):

        with tf.GradientTape() as tape:

            data_time.update(time.time() - end)

            outputs = model(imgs)
            outputs = tf.transpose(outputs, (0, 3, 1, 2))
            texts = outputs[:, 0, :, :]
            kernels = outputs[:, 1:, :, :]

            selected_masks = ohem_batch(texts, gt_texts, training_masks)

            loss_text = criterion(texts, gt_texts, selected_masks)

            loss_kernels = []
            mask0 = tf.sigmoid(texts).numpy()
            mask1 = training_masks.numpy()
            selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
            #selected_masks = torch.from_numpy(selected_masks).float()
            selected_masks = tf.convert_to_tensor(selected_masks,
                                                  dtype=tf.float32)
            #selected_masks = Variable(selected_masks.cuda())

            for i in range(6):
                kernel_i = kernels[:, i, :, :]
                gt_kernel_i = gt_kernels[:, i, :, :]
                loss_kernel_i = criterion(kernel_i, gt_kernel_i,
                                          selected_masks)
                loss_kernels.append(loss_kernel_i)
            loss_kernel = sum(loss_kernels) / len(loss_kernels)

            loss = 0.7 * loss_text + 0.3 * loss_kernel
        #反向计算各层loss
        losses.update(loss.numpy(), imgs.shape[0])

        #计算梯度 tape模式,保持跟踪
        grads = tape.gradient(loss, model.trainable_weights)
        #
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        score_text = cal_text_score(texts, gt_texts, training_masks,
                                    running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                        training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()
        size = data_length / args.batch_size
        if batch_idx % 20 == 0:
            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min \
            | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f}\
             | IOU_k: {iou_k: .4f}'.format(
                batch=batch_idx + 1,
                #size=len(train_loader),
                size=data_length / args.batch_size,
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                #eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                eta=batch_time.avg * (size - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'])
            print(output_log)
            sys.stdout.flush()

    return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'],
            score_text['Mean IoU'], score_kernel['Mean IoU'])
Exemplo n.º 12
0
def train(cfg):
    
    # Setup seeds
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    augmentations = cfg['training'].get('augmentations', None)
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    t_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['train_split'],
        #img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        augmentations=data_aug)

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['val_split'],
        #img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
        )

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=cfg['training']['batch_size'], 
                                  num_workers=cfg['training']['n_workers'], 
                                  shuffle=True)

    valloader = data.DataLoader(v_loader, 
                                batch_size=cfg['training']['batch_size'], 
                                num_workers=cfg['training']['n_workers'])

    # Setup Metrics
    running_metrics_val = runningScore(n_classes)

    # Setup Model
    model = get_model(cfg['model'], n_classes).to(device)

    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {k:v for k, v in cfg['training']['optimizer'].items() 
                        if k != 'name'}

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule'])

    loss_fn = get_loss_function(cfg)

    start_iter = 0
    if cfg['training']['resume'] is not None:
        if os.path.isfile(cfg['training']['resume']):
 
            checkpoint = torch.load(cfg['training']['resume'])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            print("=====>",
                "Loaded checkpoint '{}' (iter {})".format(
                    cfg['training']['resume'], checkpoint["epoch"]
                )
            )
        else:
            print("=====>","No checkpoint found at '{}'".format(cfg['training']['resume']))

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    best_iou = -100.0
    i = start_iter
    flag = True

    while i <= cfg['training']['train_iters'] and flag:
        for (images, labels) in trainloader:
            i += 1
            start_ts = time.time()
            scheduler.step()
            model.train()
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            loss = loss_fn(input=outputs, target=labels)

            loss.backward()
            optimizer.step()
            
            time_meter.update(time.time() - start_ts)

            if (i + 1) % cfg['training']['print_interval'] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(i + 1,
                                           cfg['training']['train_iters'], 
                                           loss.item(),
                                           time_meter.avg / cfg['training']['batch_size'])

                print(print_str)
                time_meter.reset()

            if (i + 1) % cfg['training']['val_interval'] == 0 or \
               (i + 1) == cfg['training']['train_iters']:
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)

                        outputs = model(images_val)
                        val_loss = loss_fn(input=outputs, target=labels_val)

                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()


                        running_metrics_val.update(gt, pred)
                        val_loss_meter.update(val_loss.item())


                print("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg))

                score, class_iou = running_metrics_val.get_scores()
                for k, v in score.items():
                    print(k,':',v)

                for k, v in class_iou.items():
                    print('{}: {}'.format(k, v))

                val_loss_meter.reset()
                running_metrics_val.reset()

                if score["Mean IoU : \t"] >= best_iou:
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join('./checkpoint',
                                             "{}_{}_best_model.pkl".format(
                                                 cfg['model']['arch'],
                                                 cfg['data']['dataset']))
                    print("saving···")
                    torch.save(state, save_path)

            if (i + 1) == cfg['training']['train_iters']:
                flag = False
                break
    input = input.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c)
    target = target.view(-1)
    loss = F.cross_entropy(input,
                           target,
                           weight=weight,
                           size_average=size_average,
                           ignore_index=250)
    return loss


# optimier
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
#optimizer = Adam(model.parameters(), lr = 0.01)

# Setup Metrics
running_metrics_val = runningScore(n_classes)
val_loss_meter = averageMeter()

num_epochs = 30
step = 0
epoch = 0
if load_model_file is not None:
    step = start_step
    epoch = start_epoch

score_list = []
while epoch <= num_epochs:
    epoch += 1
    print("Starting epoch %s" % epoch)
    for (images, labels) in trainloader:
        step += 1
Exemplo n.º 14
0
def train(train_loader, model, criterion, optimizer, epoch,writer):
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_Lagg = AverageMeter()
    losses_Ldis = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)

    end = time.time()
    
    for batch_idx, (imgs, gt_texts, gt_kernels, training_masks,gt_text_key,gt_kernels_key) in enumerate(train_loader):
        data_time.update(time.time() - end)
#         print(imgs.shape,gt_texts.shape,gt_kernels.shape,training_masks.shape,gt_text_key.shape,gt_kernels_key.shape)
        
        imgs = Variable(imgs.cuda()) # batch_size*channel*w*h
        gt_texts = Variable(gt_texts.cuda())# batch_size*w*h
        gt_kernels = Variable(gt_kernels.cuda())# batch_size*1*w*h
        gt_text_key = Variable(gt_text_key.cuda())# batch_size*w*h
        gt_kernels_key = Variable(gt_kernels_key.cuda())# batch_size*w*h
        training_masks = Variable(training_masks.cuda())# batch_size*w*h

        outputs = model(imgs)
        
        ind = 'cat_34'
        cv2.imwrite('text'+str(ind)+'.jpg',torch.sigmoid(outputs[0, 0, :, :]).data.cpu().numpy().astype(np.uint8)*255)
        cv2.imwrite('kernel'+str(ind)+'.jpg',torch.sigmoid(outputs[0, 1, :, :]).data.cpu().numpy().astype(np.uint8)*255)
        if batch_idx % 20 == 0:
            writer.add_image('/data/ori_image', torchvision.utils.make_grid(imgs,nrow=8, padding=10,normalize=True).cpu(),0)
            writer.add_image('/data/label_text', torchvision.utils.make_grid(gt_texts.cpu().unsqueeze(1),nrow=8, padding=10,normalize=True),0)
            writer.add_image('/data/predict_text', torchvision.utils.make_grid(torch.sigmoid(outputs[:, 0, :, :]).unsqueeze(1),nrow=8, padding=10,normalize=True).cpu(),0)
            writer.add_image('/data/label_kernel', torchvision.utils.make_grid(gt_kernels,nrow=8, padding=10,normalize=True).cpu(),0)
            writer.add_image('/data/predict_kernel', torchvision.utils.make_grid(torch.sigmoid(outputs[:, 1, :, :]).unsqueeze(1),nrow=8, padding=10,normalize=True).cpu(),0)
        
        
        texts = outputs[:, 0, :, :]
        kernels = outputs[:, 1:2, :, :]

        similarity_vector=outputs[:,2:,:,:]#torch.sigmoid(outputs[:,2:,:,:])

        selected_masks = ohem_batch(texts, gt_texts, training_masks)
        selected_masks = Variable(selected_masks.cuda())

        loss_text = criterion(texts, gt_texts, selected_masks)
        

        mask0 = torch.sigmoid(texts).data.cpu().numpy()
        mask1 = training_masks.data.cpu().numpy()
        selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
        selected_masks = torch.from_numpy(selected_masks).float()
        selected_masks = Variable(selected_masks.cuda())

        loss_kernels = []
        for i in range(1):
            kernel_i = kernels[:, i, :, :]
            gt_kernel_i = gt_kernels[:, i, :, :]
            loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks)
            loss_kernels.append(loss_kernel_i)
        loss_kernel = sum(loss_kernels) / len(loss_kernels)

        
#         total_tag,total_label_kernel,total_label_text = get_batch_tag(outputs,outputs.shape[0],gt_texts,gt_kernels)
#         loss_agg = cal_Lagg(similarity_vector,total_tag,total_label_kernel,total_label_text)
#         loss_ldis = cal_Ldis(similarity_vector,total_tag,total_label_kernel)

        loss_agg = cal_Lagg_gt(similarity_vector,gt_kernels_key,gt_text_key,training_masks)
        loss_ldis = cal_Ldis_gt(similarity_vector,gt_kernels_key,training_masks)

#         loss_agg,loss_ldis = agg_dis_loss(outputs[:, 0, :, :], outputs[:, 1, :, :], gt_text_key, gt_kernels_key, similarity_vector)
#         loss_agg = loss_agg.mean()
#         loss_ldis = loss_ldis.mean()
        
        loss = loss_text + 0.5*loss_kernel+0.25*(loss_agg+loss_ldis)
        
        writer.add_scalar('Loss/total_loss',loss,batch_idx+epoch*(1000/8))
        writer.add_scalar('Loss/loss_text',loss_text,batch_idx+epoch*(1000/8))
        writer.add_scalar('Loss/loss_kernel',loss_kernel,batch_idx+epoch*(1000/8))
        writer.add_scalar('Loss/loss_agg',loss_agg,batch_idx+epoch*(1000/8))
        writer.add_scalar('Loss/loss_ldis',loss_ldis,batch_idx+epoch*(1000/8))
        
        losses.update(loss.item(), imgs.size(0))
        losses_Lagg.update(loss_agg.item(), imgs.size(0))
        losses_Ldis.update(loss_ldis.item(), imgs.size(0))
        
        optimizer.zero_grad()
        loss.backward()
#         print('loss_text',loss_text.grad)
#         print('loss_kernel',loss_kernel.grad)
#         print('loss_agg',loss_agg.grad)
#         print('loss_ldis',loss_ldis.grad)
#         print('loss',loss.grad)
        optimizer.step()

        score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            output_log  = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f} | Lagg: {lagg:.4f} | Ldis: {ldis:.4f}'.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'],
                lagg=losses_Lagg.avg,
                ldis=losses_Ldis.avg
            )
            print(output_log)
            sys.stdout.flush()

    return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'],losses_Lagg.avg,losses_Ldis.avg)
Exemplo n.º 15
0
def train(data_path, models_path, backend, snapshot, crop_x, crop_y,
          batch_size, alpha, epochs, start_lr, milestones, gpu):
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    net, starting_epoch = build_network(snapshot, backend)
    data_path = os.path.abspath(os.path.expanduser(data_path))
    models_path = os.path.abspath(os.path.expanduser(models_path))
    os.makedirs(models_path, exist_ok=True)
    '''
        To follow this training routine you need a DataLoader that yields the tuples of the following format:
        (Bx3xHxW FloatTensor x, BxHxW LongTensor y, BxN LongTensor y_cls) where
        x - batch of input images,
        y - batch of groung truth seg maps,
        y_cls - batch of 1D tensors of dimensionality N: N total number of classes, 
        y_cls[i, T] = 1 if class T is present in image i, 0 otherwise
    '''

    voc_data = pascalVOCLoader(root=data_path,
                               is_transform=True,
                               augmentations=None)
    # train_loader, class_weights, n_images = None, None, None
    train_loader = DataLoader(voc_data,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=0)
    max_steps = len(voc_data)
    class_weights = None

    optimizer = optim.Adam(net.parameters(), lr=start_lr)
    scheduler = MultiStepLR(optimizer,
                            milestones=[int(x) for x in milestones.split(',')],
                            gamma=0.1)
    running_score = runningScore(21)
    for epoch in range(starting_epoch, starting_epoch + epochs):
        seg_criterion = nn.NLLLoss2d(weight=class_weights)
        cls_criterion = nn.BCEWithLogitsLoss(weight=class_weights)
        epoch_losses = []
        # train_iterator = tqdm(train_loader, total=max_steps // batch_size + 1)
        net.train()
        print('------------epoch[{}]----------'.format(epoch + 1))
        for i, (x, y, y_cls) in enumerate(train_loader):
            optimizer.zero_grad()
            x, y, y_cls = Variable(x).cuda(), Variable(y).cuda(), Variable(
                y_cls).float().cuda()
            out, out_cls = net(x)
            pred = out.data.max(1)[1].cpu().numpy()
            seg_loss, cls_loss = seg_criterion(out, y), cls_criterion(
                out_cls, y_cls)
            loss = seg_loss + alpha * cls_loss
            epoch_losses.append(loss.item())
            running_score.update(y.data.cpu().numpy(), pred)
            if (i + 1) % 138 == 0:
                score, class_iou = running_score.get_scores()
                for k, v in score.items():
                    print(k, v)
                    logger.info('{}:{}'.format(k, v))
                running_score.reset()
            print_format_str = "Epoch[{}] batch[{}] loss = {:.4f} LR = {}"
            print_str = print_format_str.format(epoch + 1, i + 1, loss.item(),
                                                scheduler.get_lr()[0])
            print(print_str)
            logger.info(print_str)
            '''
            status = '[{}] loss = {:.4f} avg = {:.4f}, LR = {}'.format(
                epoch + 1, loss.item(), np.mean(epoch_losses), scheduler.get_lr()[0])
            train_iterator.set_description(status)
            '''
            loss.backward()
            optimizer.step()

        scheduler.step()
        if epoch + 1 > 20:
            train_loss = ('%.4f' % np.mean(epoch_losses))
            torch.save(
                net.state_dict(),
                os.path.join(
                    models_path,
                    '_'.join(["PSPNet", str(epoch + 1), train_loss]) + '.pth'))
Exemplo n.º 16
0
def train(cycle_num,
          dirs,
          path_to_net,
          plotter,
          batch_size=12,
          test_split=0.3,
          random_state=666,
          epochs=100,
          learning_rate=0.0001,
          momentum=0.9,
          num_folds=5,
          num_slices=155,
          n_classes=4):
    """
    Applies training on the network
        Args: 
            cycle_num (int): number of cycle in n-fold (num_folds) cross validation
            dirs (string): path to dataset subject directories 
            path_to_net (string): path to directory where to save network
            plotter (callable): visdom plotter
            batch_size - default (int): batch size
            test_split - default (float): percentage of test split 
            random_state - default (int): seed for k-fold cross validation
            epochs - default (int): number of epochs
            learning_rate - default (float): learning rate 
            momentum - default (float): momentum
            num_folds - default (int): number of folds in cross validation
            num_slices - default (int): number of slices per volume
            n_classes - default (int): number of classes (regions)
    """
    print('Setting started', flush=True)

    # Creating data indices
    # arange len of list of subject dirs
    indices = np.arange(len(glob.glob(dirs + '*')))
    test_indices, trainset_indices = get_test_indices(indices, test_split)
    # kfold index generator
    for cv_num, (train_indices, val_indices) in enumerate(
            get_train_cv_indices(trainset_indices, num_folds, random_state)):
        # splitted the 5-fold CV in 5 jobs
        if cv_num != int(cycle_num):
            continue

        net = U_Net()
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        num_GPU = torch.cuda.device_count()
        if num_GPU > 1:
            print('Let us use {} GPUs!'.format(num_GPU), flush=True)
            net = nn.DataParallel(net)
        net.to(device)
        criterion = nn.CrossEntropyLoss()
        if cycle_num % 2 == 0:
            optimizer = optim.SGD(net.parameters(),
                                  lr=learning_rate,
                                  momentum=momentum)
        else:
            optimizer = optim.Adam(net.parameters(), lr=learning_rate)

        scheduler = ReduceLROnPlateau(optimizer, threshold=1e-6, patience=0)

        print('cv cycle number: ', cycle_num, flush=True)
        start = time.time()
        print('Start Train and Val loading', flush=True)

        MRIDataset_train = dataset.MRIDataset(dirs, train_indices)

        MRIDataset_val = dataset.MRIDataset(dirs, val_indices)

        datalengths = {
            'train': len(MRIDataset_train),
            'val': len(MRIDataset_val)
        }
        dataloaders = {
            'train': get_dataloader(MRIDataset_train, batch_size, num_GPU),
            'val': get_dataloader(MRIDataset_val, batch_size, num_GPU)
        }
        print('Train and Val loading took: ', time.time() - start, flush=True)
        # make loss and acc history for train and val separatly
        # Setup Metrics
        running_metrics_val = runningScore(n_classes)
        running_metrics_train = runningScore(n_classes)
        val_loss_meter = averageMeter()
        train_loss_meter = averageMeter()
        itr = 0
        iou_best = 0.
        for epoch in tqdm(range(epochs), desc='Epochs'):
            print('Epoch: ', epoch + 1, flush=True)
            phase = 'train'
            print('Phase: ', phase, flush=True)
            start = time.time()
            # Set model to training mode
            net.train()
            # Iterate over data.
            for i, data in tqdm(enumerate(dataloaders[phase]),
                                desc='Data Iteration ' + phase):
                if (i + 1) % 100 == 0:
                    print('Number of Iteration [{}/{}]'.format(
                        i + 1, int(datalengths[phase] / batch_size)),
                          flush=True)
                # get the inputs
                inputs = data['mri_data'].to(device)
                GT = data['seg'].to(device)
                subject_slice_path = data['subject_slice_path']
                # Clear all accumulated gradients
                optimizer.zero_grad()
                # Predict classes using inputs from the train set
                SR = net(inputs)
                # Compute the loss based on the predictions and
                # actual segmentation
                loss = criterion(SR, GT)
                # Backpropagate the loss
                loss.backward()
                # Adjust parameters according to the computed
                # gradients
                # -- weight update
                optimizer.step()
                # Trake and plot metrics and loss, and save network
                predictions = SR.data.max(1)[1].cpu().numpy()
                GT_cpu = GT.data.cpu().numpy()
                running_metrics_train.update(GT_cpu, predictions)
                train_loss_meter.update(loss.item(), n=1)
                if (i + 1) % 100 == 0:
                    itr += 1
                    score, class_iou = running_metrics_train.get_scores()
                    for k, v in score.items():
                        plotter.plot(k, 'itr', phase, k, itr, v)
                    for k, v in class_iou.items():
                        print('Class {} IoU: {}'.format(k, v), flush=True)
                        plotter.plot(
                            str(k) + ' Class IoU', 'itr', phase,
                            str(k) + ' Class IoU', itr, v)
                    print('Loss Train', train_loss_meter.avg, flush=True)
                    plotter.plot('Loss', 'itr', phase, 'Loss Train', itr,
                                 train_loss_meter.avg)
            print('Phase {} took {} s for whole {}set!'.format(
                phase,
                time.time() - start, phase),
                  flush=True)

            # Validation Phase
            phase = 'val'
            print('Phase: ', phase, flush=True)
            start = time.time()
            # Set model to evaluation mode
            net.eval()
            start = time.time()
            with torch.no_grad():
                # Iterate over data.
                for i, data in tqdm(enumerate(dataloaders[phase]),
                                    desc='Data Iteration ' + phase):
                    if (i + 1) % 100 == 0:
                        print('Number of Iteration [{}/{}]'.format(
                            i + 1, int(datalengths[phase] / batch_size)),
                              flush=True)
                    # get the inputs
                    inputs = data['mri_data'].to(device)
                    GT = data['seg'].to(device)
                    subject_slice_path = data['subject_slice_path']
                    # Clear all accumulated gradients
                    optimizer.zero_grad()
                    # Predict classes using inputs from the train set
                    SR = net(inputs)
                    # Compute the loss based on the predictions and
                    # actual segmentation
                    loss = criterion(SR, GT)
                    # Trake and plot metrics and loss
                    predictions = SR.data.max(1)[1].cpu().numpy()
                    GT_cpu = GT.data.cpu().numpy()
                    running_metrics_val.update(GT_cpu, predictions)
                    val_loss_meter.update(loss.item(), n=1)
                    if (i + 1) % 100 == 0:
                        itr += 1
                        score, class_iou = running_metrics_val.get_scores()
                        for k, v in score.items():
                            plotter.plot(k, 'itr', phase, k, itr, v)
                        for k, v in class_iou.items():
                            print('Class {} IoU: {}'.format(k, v), flush=True)
                            plotter.plot(
                                str(k) + ' Class IoU', 'itr', phase,
                                str(k) + ' Class IoU', itr, v)
                        print('Loss Val', val_loss_meter.avg, flush=True)
                        plotter.plot('Loss ', 'itr', phase, 'Loss Val', itr,
                                     val_loss_meter.avg)
                if (epoch + 1) % 10 == 0:
                    if score['Mean IoU'] > iou_best:
                        save_net(path_to_net, batch_size, epoch, cycle_num,
                                 train_indices, val_indices, test_indices, net,
                                 optimizer)
                        iou_best = score['Mean IoU']
                    save_output(epoch, path_to_net, subject_slice_path,
                                SR.data.cpu().numpy(), GT_cpu)
                print('Phase {} took {} s for whole {}set!'.format(
                    phase,
                    time.time() - start, phase),
                      flush=True)
            # Call the learning rate adjustment function after every epoch
            scheduler.step(val_loss_meter.avg)
    # save network after training
    save_net(path_to_net,
             batch_size,
             epochs,
             cycle_num,
             train_indices,
             val_indices,
             test_indices,
             net,
             optimizer,
             iter_num=None)
Exemplo n.º 17
0
def train(cfg, logger):

    # Setup Seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup Device
    device = torch.device("cuda:{}".format(cfg["training"]["gpu_idx"])
                          if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    augmentations = cfg["training"].get("augmentations", None)

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    t_loader = data_loader(
        data_path,
        split=cfg["data"]["train_split"],
    )

    v_loader = data_loader(
        data_path,
        split=cfg["data"]["val_split"],
    )

    n_classes = t_loader.n_classes
    n_val = len(v_loader.files['val'])

    trainloader = data.DataLoader(
        t_loader,
        batch_size=cfg["training"]["batch_size"],
        num_workers=cfg["training"]["n_workers"],
        shuffle=True,
    )

    valloader = data.DataLoader(v_loader,
                                batch_size=cfg["training"]["batch_size"],
                                num_workers=cfg["training"]["n_workers"])

    # Setup Metrics
    running_metrics_val = runningScore(n_classes, n_val)

    # Setup Model
    model = get_model(cfg["model"], n_classes).to(device)
    model = torch.nn.DataParallel(model,
                                  device_ids=[cfg["training"]["gpu_idx"]])

    # Setup Optimizer, lr_scheduler and Loss Function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {
        k: v
        for k, v in cfg["training"]["optimizer"].items() if k != "name"
    }

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    logger.info("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"])

    loss_fn = get_loss_function(cfg)
    logger.info("Using loss {}".format(loss_fn))

    # Resume Trained Model
    if cfg["training"]["resume"] is not None:
        if os.path.isfile(cfg["training"]["resume"]):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    cfg["training"]["resume"]))
            checkpoint = torch.load(cfg["training"]["resume"])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            logger.info("Loaded checkpoint '{}' (iter {})".format(
                cfg["training"]["resume"], checkpoint["epoch"]))
        else:
            logger.info("No checkpoint found at '{}'".format(
                cfg["training"]["resume"]))

    # Start Training
    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    start_iter = 0
    best_dice = -100.0
    i = start_iter
    flag = True

    while i <= cfg["training"]["train_iters"] and flag:
        for (images, labels, img_name) in trainloader:
            i += 1
            start_ts = time.time()
            scheduler.step()
            model.train()
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            loss = loss_fn(input=outputs, target=labels)

            loss.backward()
            optimizer.step()

            time_meter.update(time.time() - start_ts)

            # print train loss
            if (i + 1) % cfg["training"]["print_interval"] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(
                    i + 1,
                    cfg["training"]["train_iters"],
                    loss.item(),
                    time_meter.avg / cfg["training"]["batch_size"],
                )

                print(print_str)
                logger.info(print_str)
                time_meter.reset()

            # validation
            if (i + 1) % cfg["training"]["val_interval"] == 0 or (
                    i + 1) == cfg["training"]["train_iters"]:
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val, labels_val,
                                img_name_val) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)

                        outputs = model(images_val)
                        val_loss = loss_fn(input=outputs, target=labels_val)

                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()

                        running_metrics_val.update(gt, pred, i_val)
                        val_loss_meter.update(val_loss.item())

                logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg))

                # print val metrics
                score, class_dice = running_metrics_val.get_scores()
                for k, v in score.items():
                    print(k, v)
                    logger.info("{}: {}".format(k, v))

                for k, v in class_dice.items():
                    logger.info("{}: {}".format(k, v))

                val_loss_meter.reset()
                running_metrics_val.reset()

                # save model
                if score["Dice : \t"] >= best_dice:
                    best_dice = score["Dice : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_dice": best_dice,
                    }
                    save_path = os.path.join(
                        cfg["training"]["model_dir"],
                        "{}_{}.pkl".format(cfg["model"]["arch"],
                                           cfg["data"]["dataset"]),
                    )
                    torch.save(state, save_path)

            if (i + 1) == cfg["training"]["train_iters"]:
                flag = False
                break
Exemplo n.º 18
0
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)

    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels,
                    training_masks) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = imgs.to(device)
        gt_texts = gt_texts.to(device)
        gt_kernels = gt_kernels.to(device)
        training_masks = training_masks.to(device)

        outputs = model(imgs)
        texts = outputs[:, 0, :, :]
        kernels = outputs[:, 1:, :, :]

        selected_masks = ohem_batch(texts, gt_texts, training_masks)
        selected_masks = selected_masks.to(device)

        loss_text = criterion(texts, gt_texts, selected_masks)

        loss_kernels = []
        mask0 = torch.sigmoid(texts).data.cpu().numpy()
        mask1 = training_masks.data.cpu().numpy()
        selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
        selected_masks = torch.from_numpy(selected_masks).float()
        selected_masks = selected_masks.to(device)
        for i in range(args.kernelnum - 1):
            kernel_i = kernels[:, i, :, :]
            gt_kernel_i = gt_kernels[:, i, :, :]
            loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks)
            loss_kernels.append(loss_kernel_i)
        loss_kernel = sum(loss_kernels) / len(loss_kernels)

        loss = 0.7 * loss_text + 0.3 * loss_kernel
        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        score_text = cal_text_score(texts, gt_texts, training_masks,
                                    running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                        training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            output_log = '[{epoch}/{allepoch}][{batch}/{size}] Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format(
                epoch=epoch,
                allepoch=args.n_epoch,
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'])
            print(output_log)
            sys.stdout.flush()
            torch.save(model.state_dict(), 'training_model')

    return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'],
            score_text['Mean IoU'], score_kernel['Mean IoU'])
Exemplo n.º 19
0
def test(cfg):

    device = torch.device("cuda:{}".format(cfg["training"]["gpu_idx"])
                          if torch.cuda.is_available() else "cpu")

    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]
    v_loader = data_loader(data_path, split='val')

    n_classes = v_loader.n_classes
    n_val = len(v_loader.files['val'])
    valLoader = data.DataLoader(v_loader,
                                batch_size=1,
                                num_workers=cfg["training"]["n_workers"])

    model = get_model(cfg["model"], n_classes).to(device)
    state = convert_state_dict(
        torch.load(cfg["testing"]["trained_model"],
                   map_location=device)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    running_metrics_val = runningScore(n_classes, n_val)
    with torch.no_grad():
        for i_val, (images_val, labels_val,
                    img_name_val) in tqdm(enumerate(valLoader)):
            images_val = images_val.to(device)
            labels_val = labels_val.to(device)

            outputs = model(images_val)

            pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy())
            gt = np.squeeze(labels_val.data.cpu().numpy())

            running_metrics_val.update(gt, pred, i_val)

            decoded = v_loader.decode_segmap(pred, plot=False)
            m.imsave(
                pjoin(cfg["testing"]["path"],
                      '{}.bmp'.format(img_name_val[0])), decoded)

    score = running_metrics_val.get_scores()
    acc_all, dsc_cls = running_metrics_val.get_list()
    for k, v in score[0].items():
        print(k, v)

    if cfg["testing"]["boxplot"] == True:
        sns.set_style("whitegrid")
        labels = ['CSF', 'Gray Matter', 'White Matter']
        fig1, ax1 = plt.subplots()
        ax1.set_title('Basic Plot')
        # ax1.boxplot(dsc_cls.transpose()[:,1:n_classes], showfliers=False, labels=labels)
        ax1 = sns.boxplot(data=dsc_cls.transpose()[:, 1:n_classes])

        # ax1.yaxis.grid(True)
        ax1.set_xlabel('Three separate samples')
        ax1.set_ylabel('Dice Score')

        # path to save boxplot
        plt.savefig('/home/jwliu/disk/kxie/CNN_LSTM/test_results/box.pdf')
Exemplo n.º 20
0
def train(cfg, writer, logger):
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))
    ## create dataset
    default_gpu = cfg['model']['default_gpu']
    device = torch.device(
        "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu')
    datasets = create_dataset(cfg, writer, logger)

    use_pseudo_label = False
    model = CustomModel(cfg, writer, logger, use_pseudo_label, modal_num=3)

    # Setup Metrics
    running_metrics_val = runningScore(cfg['data']['target']['n_class'])
    source_running_metrics_val = runningScore(cfg['data']['target']['n_class'])
    val_loss_meter = averageMeter()
    source_val_loss_meter = averageMeter()
    time_meter = averageMeter()
    loss_fn = get_loss_function(cfg)
    flag_train = True

    epoches = cfg['training']['epoches']

    source_train_loader = datasets.source_train_loader
    target_train_loader = datasets.target_train_loader

    logger.info('source train batchsize is {}'.format(
        source_train_loader.args.get('batch_size')))
    print('source train batchsize is {}'.format(
        source_train_loader.args.get('batch_size')))
    logger.info('target train batchsize is {}'.format(
        target_train_loader.batch_size))
    print('target train batchsize is {}'.format(
        target_train_loader.batch_size))

    val_loader = None
    if cfg.get('valset') == 'gta5':
        val_loader = datasets.source_valid_loader
        logger.info('valset is gta5')
        print('valset is gta5')
    else:
        val_loader = datasets.target_valid_loader
        logger.info('valset is cityscapes')
        print('valset is cityscapes')
    logger.info('val batchsize is {}'.format(val_loader.batch_size))
    print('val batchsize is {}'.format(val_loader.batch_size))

    # load category anchors
    """
    objective_vectors = torch.load('category_anchors')
    model.objective_vectors = objective_vectors['objective_vectors']
    model.objective_vectors_num = objective_vectors['objective_num']
    """

    # begin training
    model.iter = 0
    for epoch in range(epoches):
        if not flag_train:
            break
        if model.iter > cfg['training']['train_iters']:
            break

        if use_pseudo_label:
            # monitoring the accuracy and recall of CAG-based PLA and probability-based PLA
            score_cl, _ = model.metrics.running_metrics_val_clusters.get_scores(
            )
            print('clus_IoU: {}'.format(score_cl["Mean IoU : \t"]))

            logger.info('clus_IoU: {}'.format(score_cl["Mean IoU : \t"]))
            logger.info('clus_Recall: {}'.format(
                model.metrics.calc_mean_Clu_recall()))
            logger.info(model.metrics.classes_recall_clu[:, 0] /
                        model.metrics.classes_recall_clu[:, 1])
            logger.info('clus_Acc: {}'.format(
                np.mean(model.metrics.classes_recall_clu[:, 0] /
                        model.metrics.classes_recall_clu[:, 1])))
            logger.info(model.metrics.classes_recall_clu[:, 0] /
                        model.metrics.classes_recall_clu[:, 2])

            score_cl, _ = model.metrics.running_metrics_val_threshold.get_scores(
            )
            logger.info('thr_IoU: {}'.format(score_cl["Mean IoU : \t"]))
            logger.info('thr_Recall: {}'.format(
                model.metrics.calc_mean_Thr_recall()))
            logger.info(model.metrics.classes_recall_thr[:, 0] /
                        model.metrics.classes_recall_thr[:, 1])
            logger.info('thr_Acc: {}'.format(
                np.mean(model.metrics.classes_recall_thr[:, 0] /
                        model.metrics.classes_recall_thr[:, 1])))
            logger.info(model.metrics.classes_recall_thr[:, 0] /
                        model.metrics.classes_recall_thr[:, 2])
        model.metrics.reset()

        for (target_image, target_label,
             target_img_name) in datasets.target_train_loader:
            model.iter += 1
            i = model.iter
            if i > cfg['training']['train_iters']:
                break
            source_batchsize = cfg['data']['source']['batch_size']
            # load source data
            images, labels, source_img_name = datasets.source_train_loader.next(
            )
            start_ts = time.time()
            images = images.to(device)
            labels = labels.to(device)
            # load target data
            target_image = target_image.to(device)
            target_label = target_label.to(device)
            #model.scheduler_step()
            model.train(logger=logger)
            if cfg['training'].get('freeze_bn') == True:
                model.freeze_bn_apply()
            model.optimizer_zerograd()
            # Switch on modals
            source_modal_ids = []
            for _img_name in source_img_name:
                if 'gtav2cityscapes' in _img_name:
                    source_modal_ids.append(0)
                elif 'gtav2cityfoggy' in _img_name:
                    source_modal_ids.append(1)
                elif 'gtav2cityrain' in _img_name:
                    source_modal_ids.append(2)
                else:
                    assert False, "[ERROR] unknown image source, neither gtav2cityscapes, gtav2cityfoggy!"

            target_modal_ids = []
            for _img_name in target_img_name:
                if 'Cityscapes_foggy' in _img_name:
                    target_modal_ids.append(1)
                elif 'Cityscapes_rain' in _img_name:
                    target_modal_ids.append(2)
                else:
                    target_modal_ids.append(0)

            loss, loss_cls_L2, loss_pseudo = model.step(
                images, labels, source_modal_ids, target_image, target_label,
                target_modal_ids, use_pseudo_label)
            # scheduler step
            model.scheduler_step()
            if loss_cls_L2 > 10:
                logger.info('loss_cls_l2 abnormal!!')

            time_meter.update(time.time() - start_ts)
            if (i + 1) % cfg['training']['print_interval'] == 0:
                unchanged_cls_num = 0
                if use_pseudo_label:
                    fmt_str = "Epoches [{:d}/{:d}] Iter [{:d}/{:d}]  Loss: {:.4f}  Loss_L2: {:.4f}  Loss_pseudo: {:.4f}  Time/Image: {:.4f}"
                else:
                    fmt_str = "Epoches [{:d}/{:d}] Iter [{:d}/{:d}]  Loss_GTA: {:.4f}  Loss_adv: {:.4f}  Loss_D: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(
                    epoch + 1, epoches, i + 1, cfg['training']['train_iters'],
                    loss.item(), loss_cls_L2.item(), loss_pseudo.item(),
                    time_meter.avg / cfg['data']['source']['batch_size'])

                print(print_str)
                logger.info(print_str)
                logger.info(
                    'unchanged number of objective class vector: {}'.format(
                        unchanged_cls_num))
                if use_pseudo_label:
                    loss_names = [
                        'train_loss', 'train_L2Loss', 'train_pseudoLoss'
                    ]
                else:
                    loss_names = [
                        'train_loss_GTA', 'train_loss_adv', 'train_loss_D'
                    ]
                writer.add_scalar('loss/{}'.format(loss_names[0]), loss.item(),
                                  i + 1)
                writer.add_scalar('loss/{}'.format(loss_names[1]),
                                  loss_cls_L2.item(), i + 1)
                writer.add_scalar('loss/{}'.format(loss_names[2]),
                                  loss_pseudo.item(), i + 1)
                time_meter.reset()

                if use_pseudo_label:
                    score_cl, _ = model.metrics.running_metrics_val_clusters.get_scores(
                    )
                    logger.info('clus_IoU: {}'.format(
                        score_cl["Mean IoU : \t"]))
                    logger.info('clus_Recall: {}'.format(
                        model.metrics.calc_mean_Clu_recall()))
                    logger.info('clus_Acc: {}'.format(
                        np.mean(model.metrics.classes_recall_clu[:, 0] /
                                model.metrics.classes_recall_clu[:, 2])))

                    score_cl, _ = model.metrics.running_metrics_val_threshold.get_scores(
                    )
                    logger.info('thr_IoU: {}'.format(
                        score_cl["Mean IoU : \t"]))
                    logger.info('thr_Recall: {}'.format(
                        model.metrics.calc_mean_Thr_recall()))
                    logger.info('thr_Acc: {}'.format(
                        np.mean(model.metrics.classes_recall_thr[:, 0] /
                                model.metrics.classes_recall_thr[:, 2])))

            # evaluation
            if (i + 1) % cfg['training']['val_interval'] == 0 or \
                (i + 1) == cfg['training']['train_iters']:
                validation(
                    model, logger, writer, datasets, device, running_metrics_val, val_loss_meter, loss_fn,\
                    source_val_loss_meter, source_running_metrics_val, iters = model.iter
                    )
                torch.cuda.empty_cache()
                logger.info('Best iou until now is {}'.format(model.best_iou))
            if (i + 1) == cfg['training']['train_iters']:
                flag = False
                break
Exemplo n.º 21
0
def main(test_args):

    testset = "/mnt/iusers01/eee01/mchiwml4/CamVid/test"
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(mean, std)])
    test_dataset = DataLoader(Loaddata(testset,
                                       transform=transform,
                                       target_transform=MaskToTensor()),
                              batch_size=1,
                              shuffle=False,
                              num_workers=8)

    label_num = 11
    model = deeplab_v2.Deeplab_Resnet(label_num)
    model = model.cuda()
    model.load_state_dict(torch.load(test_args.load_param))
    model.eval()

    total = np.zeros((label_num, ))

    running_metrics = runningScore(label_num)

    for j, data in enumerate(test_dataset):
        inputs, labels = data
        inputs = Variable(inputs.cuda())

        outputs = model(inputs)

        H = inputs.size()[2]
        W = inputs.size()[3]
        interp_resize = nn.Upsample(size=(int(H), int(W)), mode='bilinear')
        output = interp_resize(outputs[3])
        output = F.softmax(output, dim=1)
        output = output.data.cpu().numpy()

        if test_args.crf:
            crf_output = np.zeros(output.shape)
            images = inputs.data.cpu().numpy().astype(np.uint8)
            for i, (image, prob_map) in enumerate(zip(images, output)):
                image = image.transpose(1, 2, 0)
                crf_output[i] = dense_crf(image, prob_map)
            output = crf_output

        pred = np.argmax(output, axis=1)
        gt = labels.numpy()

        running_metrics.update(gt, pred)

        for i in range(label_num):
            mask = gt == i  # ground truth mask of class i
            total[i] += np.sum(
                mask)  # total number of pixels of class i (tp+fn)

    score, class_iou, class_acc = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)
    print('class iou: ')
    for i in range(label_num):
        print(i, class_iou[i])
    print('class acc: ')
    for i in range(label_num):
        print(i, class_acc[i])

    print('number of pixels:')
    print(total)
Exemplo n.º 22
0
def train(train_loader, model, criterion, optimizer, epoch):
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)

    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels,
                    training_masks) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = Variable(imgs.cuda())
        gt_texts = Variable(gt_texts.cuda())
        gt_kernels = Variable(gt_kernels.cuda())
        training_masks = Variable(training_masks.cuda())

        outputs = model(imgs)
        texts = outputs[:, 0, :, :]
        kernels = outputs[:, 1:, :, :]

        selected_masks = ohem_batch(texts, gt_texts, training_masks)
        selected_masks = Variable(selected_masks.cuda())

        loss_text = criterion(texts, gt_texts, selected_masks)  #dice loss
        #loss_text = (F.binary_cross_entropy_with_logits(texts, gt_texts, reduce=False) * selected_masks).mean()#bce loss
        loss_kernels = []
        mask0 = torch.sigmoid(texts).data.cpu().numpy()
        mask1 = training_masks.data.cpu().numpy()
        selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
        selected_masks = torch.from_numpy(selected_masks).float()
        selected_masks = Variable(selected_masks.cuda())
        for i in range(6):
            kernel_i = kernels[:, i, :, :]
            gt_kernel_i = gt_kernels[:, i, :, :]
            loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks)
            loss_kernels.append(loss_kernel_i)
        loss_kernel = sum(loss_kernels) / len(loss_kernels)

        loss = 0.7 * loss_text + 0.3 * loss_kernel
        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        score_text = cal_text_score(texts, gt_texts, training_masks,
                                    running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                        training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'])
            print(output_log)
            sys.stdout.flush()

    return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'],
            score_text['Mean IoU'], score_kernel['Mean IoU'])
Exemplo n.º 23
0
net = FCN_Dilated()
net.load_state_dict(torch.load('best_seg_dcgan.wts'))
net.to(device)

net.eval()
dst = kittiLoader(
    '/home-local/rohitrishabh/utilScripts/Segmentation/FCN_Dilated/data_road/',
    split="val")
valloader = data.DataLoader(dst,
                            batch_size=1,
                            shuffle=True,
                            num_workers=1,
                            pin_memory=True)

score = runningScore(2)

for i, data in enumerate(valloader):
    imgs, labels = data
    imgs, labels = imgs.to(device), labels.to(device)

    with torch.no_grad():
        out = net(imgs)

    pred = out.data.max(1)[1].cpu().numpy()
    plt.imshow(pred[0])
    plt.savefig('output_test/' + str(i) + '_out.png')
    plt.imshow(labels[0])
    plt.savefig('output_test/' + str(i) + '_orig.png')
    print(np.sum(pred))
    gt = labels.data.cpu().numpy()
Exemplo n.º 24
0
def train(train_loader, model, criterion, optimizer, epoch, summary_writer):
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    L1_loss = torch.nn.L1Loss()
    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels, training_masks, ori_imgs,
                    mask_kernels) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = Variable(imgs.cuda())
        gt_texts = Variable(gt_texts.cuda())
        gt_kernels = Variable(gt_kernels.cuda())
        training_masks = Variable(training_masks.cuda())

        outputs = model(imgs)
        probability_map, threshold_map, binarization_map = outputs

        # loss for probability_map
        selected_masks = ohem_batch(probability_map, gt_texts, training_masks)
        selected_masks = Variable(selected_masks.cuda())
        loss_probability = criterion(probability_map, gt_texts, selected_masks)

        # loss for binary_map
        binary_selected_masks = ohem_batch(binarization_map, gt_texts,
                                           training_masks)
        binary_selected_masks = Variable(binary_selected_masks.cuda())
        loss_binary = criterion(binarization_map, gt_texts,
                                binary_selected_masks)

        # loss for threshold_map
        loss_thresh = L1_loss(threshold_map * mask_kernels,
                              gt_kernels * mask_kernels)

        loss = loss_probability + loss_binary + 10 * loss_thresh

        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        score_prob = cal_text_score(probability_map, gt_texts, training_masks,
                                    running_metric_text)
        score_binary = cal_text_score(binarization_map, gt_texts,
                                      training_masks, running_metric_text)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            # visualization
            global_step = epoch * len(train_loader) + batch_idx
            summary_writer.add_images('gt/img',
                                      ori_imgs[:2],
                                      global_step=global_step)
            summary_writer.add_images('gt/score_map',
                                      torch.unsqueeze(gt_texts[:2], 1),
                                      global_step=global_step)
            summary_writer.add_images('gt/thresh_map',
                                      torch.unsqueeze(gt_kernels[:2], 1),
                                      global_step=global_step)
            summary_writer.add_images('predicition/score_map',
                                      torch.sigmoid(probability_map[:2]),
                                      global_step=global_step)
            summary_writer.add_images('predicition/binary_map',
                                      torch.sigmoid(binarization_map[:2]),
                                      global_step=global_step)
            summary_writer.add_images('predicition/threshold_map',
                                      torch.sigmoid(threshold_map[:2]),
                                      global_step=global_step)

            summary_writer.add_scalar('loss/text_loss',
                                      loss_probability,
                                      global_step=global_step)
            summary_writer.add_scalar('loss/binary_loss',
                                      loss_binary,
                                      global_step=global_step)
            summary_writer.add_scalar('loss/thresh_loss',
                                      loss_thresh,
                                      global_step=global_step)

            summary_writer.add_scalar('metric/acc_t',
                                      score_prob['Mean Acc'],
                                      global_step=global_step)
            summary_writer.add_scalar('metric/acc_b',
                                      score_binary['Mean Acc'],
                                      global_step=global_step)
            summary_writer.add_scalar('metric/iou_t',
                                      score_prob['Mean IoU'],
                                      global_step=global_step)
            summary_writer.add_scalar('metric/iou_b',
                                      score_binary['Mean IoU'],
                                      global_step=global_step)

            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc_t: .4f} | Acc_b: {acc_b: .4f} | IOU_t: {iou_t: .4f} | IOU_b: {iou_k: .4f}'.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc_t=score_prob['Mean Acc'],
                acc_b=score_binary['Mean Acc'],
                iou_t=score_prob['Mean IoU'],
                iou_k=score_binary['Mean IoU'])
            print(output_log)
            sys.stdout.flush()

    return (losses.avg, score_prob['Mean Acc'], score_binary['Mean Acc'],
            score_prob['Mean IoU'], score_binary['Mean IoU'])
Exemplo n.º 25
0
def train(train_loader, model, criterion, optimizer, epoch, summary_writer):
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    L1_loss = torch.nn.L1Loss()
    end = time.time()
    for batch_idx, (imgs, gt_texts, training_masks, ori_imgs,
                    border_map) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = Variable(imgs.cuda())
        gt_texts = Variable(gt_texts[:, ::4, ::4].cuda())
        training_masks = Variable(training_masks[:, ::4, ::4].cuda())
        border_map = Variable(border_map.cuda())
        outputs = model(imgs)
        gaussian_map = outputs
        #         gaussian_map, center_map, region_map = outputs
        weighted_mse_loss, mse_region_loss, loss_center = weighted_regression(
            gaussian_map, gt_texts, training_masks)

        center_gt = torch.where(gt_texts > 0.7, gt_texts,
                                torch.zeros_like(gt_texts))
        #         center_mask = torch.where(gt_texts > 0.7, torch.ones_like(gt_texts), torch.zeros_like(gt_texts))

        region_gt = torch.where(gt_texts > 0.4, gt_texts,
                                torch.zeros_like(gt_texts))
        #         region_mask = torch.where(gt_texts > 0.4, torch.ones_like(gt_texts), torch.zeros_like(gt_texts))

        # loss for center_map
        #         loss_center_dice = criterion(gaussian_map, center_gt, training_masks)

        # loss for region_map
        loss_region_dice = criterion(gaussian_map, region_gt, training_masks)

        # loss for border_map
        #         border_mask = 1. - (center_other - border_map)
        #         loss_border = criterion(gaussian_map, gt_texts, training_masks)

        loss = loss_center + weighted_mse_loss + mse_region_loss + loss_region_dice
        #         print("loss:", loss_center, "loss_region:", loss_region, "weighted_mse_loss:", weighted_mse_loss)
        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        score_center = cal_text_score(gaussian_map, gt_texts, training_masks,
                                      running_metric_text, 0, 0.8)
        #         score_region = cal_text_score(gaussian_map, gt_texts, training_masks * region_mask, running_metric_text, 0, 0.2)

        batch_time.update(time.time() - end)
        end = time.time()
        if batch_idx % 20 == 0:
            # visualization
            global_step = epoch * len(train_loader) + batch_idx
            maps = torch.sigmoid(gaussian_map[0:1])
            center_map = torch.where(maps > 0.8, maps, torch.zeros_like(maps))
            text_map = torch.where(maps > 0.4, maps, torch.zeros_like(maps))
            summary_writer.add_images('gt/img',
                                      ori_imgs[0:1],
                                      global_step=global_step)
            summary_writer.add_images('gt/score_map',
                                      torch.unsqueeze(gt_texts[0:1], 1),
                                      global_step=global_step)
            summary_writer.add_images('gt/center_map',
                                      torch.unsqueeze(center_gt[0:1], 1),
                                      global_step=global_step)
            summary_writer.add_images('gt/region_map',
                                      torch.unsqueeze(region_gt[0:1], 1),
                                      global_step=global_step)
            #             summary_writer.add_images('gt/border_map', torch.unsqueeze(border_mask[0:1], 1), global_step=global_step)
            summary_writer.add_images('predicition/score_map',
                                      torch.sigmoid(gaussian_map[0:1]),
                                      global_step=global_step)
            summary_writer.add_images('predicition/center_map',
                                      torch.sigmoid(center_map[0:1]),
                                      global_step=global_step)
            summary_writer.add_images('predicition/region_map',
                                      torch.sigmoid(text_map[0:1]),
                                      global_step=global_step)

            summary_writer.add_scalar('loss/reg_loss',
                                      weighted_mse_loss,
                                      global_step=global_step)
            summary_writer.add_scalar('loss/reg_center_loss',
                                      loss_center,
                                      global_step=global_step)
            #             summary_writer.add_scalar('loss/center_dice_loss', loss_center_dice, global_step=global_step)
            summary_writer.add_scalar('loss/region_dice_loss',
                                      loss_region_dice,
                                      global_step=global_step)
            #             summary_writer.add_scalar('loss/border_loss', loss_border, global_step=global_step)
            summary_writer.add_scalar('loss/text_region_loss',
                                      mse_region_loss,
                                      global_step=global_step)

            summary_writer.add_scalar('metric/acc_c',
                                      score_center['Mean Acc'],
                                      global_step=global_step)
            summary_writer.add_scalar('metric/iou_c',
                                      score_center['Mean IoU'],
                                      global_step=global_step)
            #             summary_writer.add_scalar('metric/acc_t', score_region['Mean Acc'], global_step=global_step)
            #             summary_writer.add_scalar('metric/iou_t', score_region['Mean IoU'], global_step=global_step)

            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_c: {acc_c: .4f} | IOU_c: {iou_c: .4f} '.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc_c=score_center['Mean Acc'],
                iou_c=score_center['Mean IoU'],
                #                 acc_t=score_region['Mean Acc'],
                #                 iou_t=score_region['Mean IoU'],
            )
            print(output_log)
            sys.stdout.flush()

    return (losses.avg, score_center['Mean Acc'], score_center['Mean IoU'])
def train(
    models_path,
    backend,
    snapshot,
    alpha,
    epochs,
    init_lr,
):
    #     os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    net, starting_epoch = build_network(snapshot, backend)
    #     net.train()

    models_path = os.path.abspath(os.path.expanduser(models_path))
    os.makedirs(models_path, exist_ok=True)

    class_weights = torch.ones(num_classes).cuda()
    class_weight = torch.ones(batch_size, num_classes).cuda()

    optimizer = optim.Adam(net.parameters(), lr=start_lr, weight_decay=0.0001)
    # Setup Metrics
    running_metrics = runningScore(num_classes)

    best_iou = -100.0
    #从断点出恢复训练
    for epoch in range(starting_epoch, starting_epoch + epochs):

        seg_criterion = nn.NLLLoss2d(weight=class_weights)
        #         cls_criterion = nn.BCEWithLogitsLoss(weight=class_weights)#二分类

        epoch_losses = []
        train_iterator = tqdm(train_loader, total=len(train_loader))

        net.train()
        for x, y, y_cls in train_iterator:

            optimizer.zero_grad()
            x, y, y_cls = Variable(x).cuda(), Variable(y).cuda(), Variable(
                y_cls).cuda()
            #             #y:torch.Size([16, 1, 256, 256])

            out, out_cls = net(x)
            #             print('out_cls:',out_cls.size())#16,150,256,256

            seg_loss = seg_criterion(out, y.squeeze(1))
            cls_loss = seg_criterion(out_cls, y.squeeze(1))

            loss = seg_loss + alpha * cls_loss

            epoch_losses.append(loss.data[0])

            status = '[{0}] loss = {1:0.5f} avg = {2:0.5f}, '.format(
                epoch + 1, loss.data[0], np.mean(epoch_losses))
            train_iterator.set_description(status)  #tadm中可以打印信息

            loss.backward()
            optimizer.step()

        net.eval()
        for i_val, (images_val, labels_val,
                    label_cls) in tqdm(enumerate(val_loader)):
            images_val = Variable(images_val.cuda(), volatile=True)
            labels_val = Variable(labels_val.cuda(), volatile=True)

            outputs, outputs_cls = net(
                images_val)  #outputs=batch,num_classes,H,W

            pred = outputs.data.max(1)[1].cpu().numpy()
            gt = labels_val.data.cpu().numpy()
            running_metrics.update(gt, pred)

        score, class_iou = running_metrics.get_scores()
        running_metrics.reset()
        if score['Mean IoU : \t'] >= best_iou:
            best_iou = score['Mean IoU : \t']
            print("{}_{}_best_model.pkl".format(
                os.path.join(models_path, 'PSPNet'), 'ADEK'))
            torch.save(
                net.state_dict(), "{}_{}_best_model.pkl".format(
                    os.path.join(models_path, 'PSPNet'), 'ADEK'))

        poly_lr_scheduler(optimizer,
                          init_lr,
                          epoch,
                          lr_decay_iter=10,
                          max_iter=100,
                          power=0.9)
Exemplo n.º 27
0

IMG_Path = Path("C:\\Users\dell\Desktop\\tt\img")
IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")), alg=natsort.PATH)
IMG_Str = []
for i in IMG_File:
    IMG_Str.append(str(i))

GT_Path = Path("C:\\Users\dell\Desktop\\tt\gt")
# GT_Path = Path("I:\\DVS_dataset\scnn_result\\vgg_SCNN_merge\merge")
GT_File = natsort.natsorted(list(GT_Path.glob("*.png")), alg=natsort.PATH)
GT_Str = []
for j in GT_File:
    GT_Str.append(str(j))
t = time.time()
running_metrics_val = runningScore(5)
label_values = [[0, 0, 0], [100, 100, 100], [150, 150, 150], [200, 200, 200],
                [250, 250, 250]]


def compute_two(img_path, gt_path, img_path2, gt_path2):
    out = load_image(img_path)
    # gt = load_image(gt_path)
    # 不要用interpolation=cv.INTER_NEAREST,不然结果不一样,估计opencv bug
    gt = cv.resize(load_image(gt_path), (512, 256), cv.INTER_NEAREST)
    # val_gt_erode paired with [0,0,0]label value
    # label order: R G B
    # num_classes = len(label_values)

    gt = util.reverse_one_hot(util.one_hot_it(gt, label_values))
    output_image = util.reverse_one_hot(util.one_hot_it(out, label_values))
Exemplo n.º 28
0
    def validate(self):

        visualizations = []
        val_metrics = runningScore(self.n_classes)
        val_loss_meter = averageMeter()

        with torch.no_grad():
            self.model.eval()
            for rgb, ir, target in tqdm.tqdm(
                    self.val_loader, total=len(self.val_loader),
                    desc=f'Valid epoch={self.epoch}', ncols=80, leave=False):

                rgb, ir, target = rgb.to(self.device), ir.to(self.device), target.to(self.device)

                score = self.model(rgb, ir)
                # score = self.model(rgb)

                weight = self.val_loader.dataset.class_weight
                if weight:
                    weight = torch.Tensor(weight).to(self.device)

                loss = CrossEntropyLoss(score, target, weight=weight, reduction='mean', ignore_index=-1)
                loss_data = loss.data.item()
                if np.isnan(loss_data):
                    raise ValueError('loss is nan while validating')

                val_loss_meter.update(loss_data)

                rgbs = rgb.data.cpu()
                irs = ir.data.cpu()

                if isinstance(score, (tuple, list)):
                    lbl_pred = score[0].data.max(1)[1].cpu().numpy()
                else:
                    lbl_pred = score.data.max(1)[1].cpu().numpy()
                lbl_true = target.data.cpu()

                for rgb, ir, lt, lp in zip(rgbs, irs, lbl_true, lbl_pred):
                    rgb, ir, lt = self.val_loader.dataset.untransform(rgb, ir, lt)
                    val_metrics.update(lt, lp)
                    if len(visualizations) < 9:
                        viz = visualize_segmentation(
                            lbl_pred=lp, lbl_true=lt, img=rgb, ir=ir,
                            n_classes=self.n_classes, dataloader=self.train_loader)
                        visualizations.append(viz)

        acc, acc_cls, mean_iou, fwavacc, cls_iu = val_metrics.get_scores()
        metrics = [acc, acc_cls, mean_iou, fwavacc]

        print(f'\nEpoch: {self.epoch}', f'loss: {val_loss_meter.avg}, mIoU: {mean_iou}')

        out = osp.join(self.out, 'visualization_viz')
        if not osp.exists(out):
            os.makedirs(out)
        out_file = osp.join(out, 'epoch{:0>5d}.jpg'.format(self.epoch))
        scipy.misc.imsave(out_file, get_tile_image(visualizations))

        with open(osp.join(self.out, 'log.csv'), 'a') as f:
            elapsed_time = (
                datetime.datetime.now(pytz.timezone('UTC')) -
                self.timestamp_start).total_seconds()
            log = [self.epoch] + [''] * 5 + \
                  [val_loss_meter.avg] + metrics + [elapsed_time]
            log = map(str, log)
            f.write(','.join(log) + '\n')

        mean_iu = metrics[2]
        is_best = mean_iu > self.best_mean_iu
        if is_best:
            self.best_mean_iu = mean_iu
        torch.save({
            'epoch': self.epoch,
            'arch': self.model.__class__.__name__,
            'optim_state_dict': self.optim.state_dict(),
            'model_state_dict': self.model.state_dict(),
            'best_mean_iu': self.best_mean_iu,
        }, osp.join(self.out, 'checkpoint.pth.tar'))
        if is_best:
            shutil.copy(osp.join(self.out, 'checkpoint.pth.tar'),
                        osp.join(self.out, 'model_best.pth.tar'))

        val_loss_meter.reset()
        val_metrics.reset()

        class_name = self.val_loader.dataset.class_names
        if class_name is not None:
            for index, value in enumerate(cls_iu.values()):
                offset = 20 - len(class_name[index])
                print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}')
        else:
            print("\nyou don't specify class_names, use number instead")
            for key, value in cls_iu.items():
                print(key, f'{value * 100:>.2f}')
Exemplo n.º 29
0
def train(train_loader, model, criterion, optimizer, epoch, tflogger):
    model.train()
    #taglist = ['module.conv1.weight','module.bn1.weight','module.bn1.bias','module.conv2.weight','module.conv2.bias','module.bn2.weight','module.bn2.bias','module.conv3.weight','module.conv3.bias','module.bn3.weight','module.bn3.bia','module.conv4.weight','module.conv4.bias']
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)
    global globalcounter

    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels,
                    training_masks) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = Variable(imgs.cuda())
        gt_texts = Variable(gt_texts.cuda())
        gt_kernels = Variable(gt_kernels.cuda())
        training_masks = Variable(training_masks.cuda())

        outputs = model(imgs)
        texts = outputs[:, 0, :, :]
        kernels = outputs[:, 1:, :, :]

        selected_masks = ohem_batch(texts, gt_texts, training_masks)
        selected_masks = Variable(selected_masks.cuda())

        loss_text = criterion(texts, gt_texts, selected_masks)

        loss_kernels = []
        mask0 = torch.sigmoid(texts).data.cpu().numpy()
        mask1 = training_masks.data.cpu().numpy()
        selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
        selected_masks = torch.from_numpy(selected_masks).float()
        selected_masks = Variable(selected_masks.cuda())
        for i in range(6):
            kernel_i = kernels[:, i, :, :]
            gt_kernel_i = gt_kernels[:, i, :, :]
            loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks)
            loss_kernels.append(loss_kernel_i)
        loss_kernel = sum(loss_kernels) / len(loss_kernels)

        loss = 0.7 * loss_text + 0.3 * loss_kernel
        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        score_text = cal_text_score(texts, gt_texts, training_masks,
                                    running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                        training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'])
            print(output_log)
            sys.stdout.flush()

        if batch_idx % 100 == 0:
            for tag, value in model.named_parameters():
                tag = tag.replace('.', '/')
                tflogger.histo_summary(tag,
                                       value.data.detach().cpu().numpy(),
                                       globalcounter)
                tflogger.histo_summary(tag + '/grad',
                                       value.grad.data.detach().cpu().numpy(),
                                       globalcounter)
            globalcounter += 1

    return (float(losses.avg), float(score_text['Mean Acc']),
            float(score_kernel['Mean Acc']), float(score_text['Mean IoU']),
            float(score_kernel['Mean IoU']))
Exemplo n.º 30
0
    def train_epoch(self):
        if self.epoch % self.val_epoch == 0 or self.epoch == 1:
            self.validate()

        self.model.train()
        train_metrics = runningScore(self.n_classes)
        train_loss_meter = averageMeter()

        self.optim.zero_grad()

        for rgb, ir, target in tqdm.tqdm(
                self.train_loader, total=len(self.train_loader),
                desc=f'Train epoch={self.epoch}', ncols=80, leave=False):

            self.iter += 1
            assert self.model.training

            rgb, ir, target = rgb.to(self.device), ir.to(self.device), target.to(self.device)
            score = self.model(rgb, ir)
            # score = self.model(rgb)

            weight = self.train_loader.dataset.class_weight
            if weight:
                weight = torch.Tensor(weight).to(self.device)

            loss = CrossEntropyLoss(score, target, weight=weight, ignore_index=-1, reduction='mean')

            loss_data = loss.data.item()
            train_loss_meter.update(loss_data)

            if np.isnan(loss_data):
                raise ValueError('loss is nan while training')

            # loss.backward(retain_graph=True)
            loss.backward()

            self.optim.step()
            self.optim.zero_grad()

            if isinstance(score, (tuple, list)):
                lbl_pred = score[0].data.max(1)[1].cpu().numpy()
            else:
                lbl_pred = score.data.max(1)[1].cpu().numpy()
            lbl_true = target.data.cpu().numpy()
            train_metrics.update(lbl_true, lbl_pred)

        acc, acc_cls, mean_iou, fwavacc, _ = train_metrics.get_scores()
        metrics = [acc, acc_cls, mean_iou, fwavacc]

        with open(osp.join(self.out, 'log.csv'), 'a') as f:
            elapsed_time = (
                datetime.datetime.now(pytz.timezone('UTC')) -
                self.timestamp_start).total_seconds()
            log = [self.epoch] + [train_loss_meter.avg] + \
                metrics + [''] * 5 + [elapsed_time]
            log = map(str, log)
            f.write(','.join(log) + '\n')

        if self.scheduler:
            self.scheduler.step()
        if self.epoch % self.val_epoch == 0 or self.epoch == 1:
            lr = self.optim.param_groups[0]['lr']
            print(f'\nCurrent base learning rate of epoch {self.epoch}: {lr:.7f}')

        train_loss_meter.reset()
        train_metrics.reset()