def train(train_loader, model, criteria, optimizer, device, batch_size): model.train() evaluator = Evaluator(21) evaluator.reset() train_loss = AverageMeter("Loss", ":.4") progress = ProgressMeter(len(train_loader), train_loss) for i, (image, mask) in enumerate(train_loader): image = image.to(device) mask = mask.to(device) output = model(image) loss = criteria(output, mask) predict = output.data.cpu().numpy() predict = np.argmax(predict, axis=1) target = mask.cpu().numpy() evaluator.add_batch(target, predict) train_loss.update(loss.item(), batch_size) optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: progress.print(i) evaluator.add_batch(target, predict) info = {"loss": train_loss.val, "pixel acc": evaluator.Pixel_Accuracy(), "mean acc": evaluator.Pixel_Accuracy_Class(), "miou": evaluator.Mean_Intersection_over_Union()} return info
def test(segmentation_module, args=None): label_num_ = args.num_class segmentation_module.eval() evaluator = Evaluator(label_num_) print('validation') with open(os.path.join(args.dataroot, 'val.txt'), 'r') as f: lines = f.readlines() videolists = [line[:-1] for line in lines] for video in videolists: test_dataset = TestDataset_clip(args.dataroot, video, args, is_train=True) loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batchsize, shuffle=False, num_workers=args.workers, drop_last=False) for i, data in enumerate(loader): # process data print('[{}]/[{}]'.format(i, len(loader))) imgs, gts, clip_imgs, _, _ = data imgs = imgs.cuda(args.start_gpu) gts = gts.cuda(args.start_gpu) clip_imgs = [img.cuda(args.start_gpu) for img in clip_imgs] batch_data = {} batch_data['img_data'] = imgs batch_data['seg_label'] = gts batch_data['clipimgs_data'] = clip_imgs segSize = (imgs.size(2), imgs.size(3)) with torch.no_grad(): scores = segmentation_module(batch_data, segSize=segSize) pred = torch.argmax(scores, dim=1) pred = pred.data.cpu().numpy() target = gts.squeeze(1).cpu().numpy() # Add batch sample into evaluator evaluator.add_batch(target, pred) Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print('Validation:') print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU))
def test(segmentation_module, loader,args=None): if args.lesslabel: label_num_ = 42 else: label_num_ =args.num_class segmentation_module.eval() evaluator = Evaluator(label_num_) print('validation') for i,data in enumerate(loader): # process data print('[{}]/[{}]'.format(i,len(loader))) imgs, gts = data imgs = imgs.cuda(args.start_gpu) gts = gts.cuda(args.start_gpu) batch_data ={} batch_data['img_data']= imgs batch_data['seg_label'] = gts segSize = (imgs.size(2), imgs.size(3)) with torch.no_grad(): scores = segmentation_module(batch_data, segSize=segSize) pred = torch.argmax(scores, dim=1) pred = pred.data.cpu().numpy() target = gts.squeeze(1).cpu().numpy() # Add batch sample into evaluator evaluator.add_batch(target, pred) Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU =evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() #if self.args.tensorboard: # self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) # self.writer.add_scalar('val/mIoU', mIoU, epoch) # self.writer.add_scalar('val/Acc', Acc, epoch) # self.writer.add_scalar('val/Acc_class', Acc_class, epoch) # self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') #print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU))
def eval_net(net, data_loader, device): net.eval() val_batch_num = len(data_loader) eval_loss = 0 e = Evaluator(num_class=8) pixel_acc_avg = 0 mean_iou_avg = 0 fw_iou_avg = 0 with tqdm(total=val_batch_num, desc='Validation round', unit='batch', leave=False) as pbar: for idx, batch_samples in enumerate(data_loader): batch_image, batch_mask = batch_samples["image"], batch_samples[ "mask"] batch_image = batch_image.to(device=device, dtype=torch.float32) mask_true = batch_mask.to(device=device, dtype=torch.long) with torch.no_grad(): mask_pred = net(batch_image) probs = F.softmax(mask_pred, dim=1).squeeze(0) # [8, 256, 256] pre = torch.argmax(probs, dim=1) # [256,256] #???? e.add_batch(mask_true.cpu().data.numpy(), pre.cpu().data.numpy()) pixel_acc = e.Pixel_Accuracy() pixel_acc_avg += pixel_acc mean_iou = e.Mean_Intersection_over_Union() mean_iou_avg += mean_iou fw_iou = e.Frequency_Weighted_Intersection_over_Union() fw_iou_avg += fw_iou eval_loss += F.cross_entropy(mask_pred, mask_true).item() pbar.set_postfix({'eval_loss': eval_loss / (idx + 1)}) pbar.update() e.reset() print("pixel_acc_avg:" + str(pixel_acc_avg / val_batch_num)) print("mean_iou_avg:" + str(mean_iou_avg / val_batch_num)) print("fw_iou_avg:" + str(fw_iou_avg / val_batch_num)) net.train() return eval_loss / val_batch_num, pixel_acc_avg / val_batch_num, mean_iou_avg / val_batch_num, fw_iou_avg / val_batch_num
def validate(val_loader, model, criteria, device, batch_size): model.eval() evaluator = Evaluator(21) evaluator.reset() val_loss = [] with torch.no_grad(): for i, (image, mask) in enumerate(tqdm(val_loader)): image = image.to(device) mask = mask.to(device) output = model(image) loss = criteria(output, mask) predict = output.data.cpu().numpy() predict = np.argmax(predict, axis=1) target = mask.cpu().numpy() evaluator.add_batch(target, predict) val_loss.append(loss.item()) info = {"loss": sum(val_loss) / len(val_loader), "pixel acc": evaluator.Pixel_Accuracy(), "mean acc": evaluator.Pixel_Accuracy_Class(), "miou": evaluator.Mean_Intersection_over_Union()} return info
def main(cfg, gpu,args): num_class =args.num_class torch.cuda.set_device(gpu) # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.cuda(args.start_gpu) with open(os.path.join(args.dataroot,args.split+'.txt')) as f: lines=f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU=0.0 total_vfwIOU=0.0 total_video = len(videolists) v = [] n = [] for video in videolists: eval_video.reset() dataset_test = TestDataset( args.dataroot, video,args) loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=args.batchsize, shuffle=False, num_workers=5, drop_last=False) # Main loop test(segmentation_module, loader_test, gpu,args,evaluator,eval_video,video) v_mIOU =eval_video.Mean_Intersection_over_Union() v.append(v) n.append(video) print(video, v_mIOU) total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() total_vfwIOU += v_fwIOU with open("vmiou_hr.pkl", 'wb') as f: pkl.dump([v, n], f) total_vmIOU = total_vmIOU/total_video total_vfwIOU = total_vfwIOU/total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}".format(Acc, Acc_class, mIoU, FWIoU,total_vmIOU,total_vfwIOU)) print('Inference done!')
def main(cfg, gpu, args): if args.lesslabel: num_class = 42 else: num_class = args.num_class torch.cuda.set_device(gpu) # Network Builders if args.method == 'tdnet': segmentation_module = td4_psp(args=args, backbone='resnet18') else: net_encoder = ModelBuilder.build_encoder(arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights='') net_decoder = ModelBuilder.build_decoder(arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights='', use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) if args.method == 'netwarp': segmentation_module = NetWarp(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'ETC': segmentation_module = ETC(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'nonlocal3d': segmentation_module = Non_local3d(args, net_encoder, crit) elif args.method == 'our_warp': segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args) elif args.method == 'propnet': segmentation_module = PropNet(net_encoder, net_decoder, crit, args) elif args.method == 'our_warp_merge': segmentation_module = OurWarpMerge(net_encoder, net_decoder, crit, args) elif args.method == 'clip_psp': segmentation_module = Clip_PSP(net_encoder, crit, args) elif args.method == 'clip_ocr': segmentation_module = ClipOCRNet(net_encoder, crit, args) elif args.method == 'netwarp_ocr': segmentation_module = NetWarp_ocr(net_encoder, crit, args) elif args.method == 'etc_ocr': segmentation_module = ETC_ocr(net_encoder, crit, args) else: raise NotImplementedError segmentation_module.cuda(args.start_gpu) to_load = torch.load(args.load, map_location=torch.device("cuda:" + str(args.start_gpu))) new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 segmentation_module.load_state_dict(new_state_dict) if args.gpu_num > 1: train_gpu_ = list(range(args.gpu_num)) train_gpu_ = [int(gpu_ + args.start_gpu) for gpu_ in train_gpu_] print(train_gpu_) segmentation_module = torch.nn.DataParallel(segmentation_module, device_ids=train_gpu_) patch_replication_callback(segmentation_module) with open(os.path.join(args.dataroot, args.split + '.txt')) as f: lines = f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU = 0.0 total_vfwIOU = 0.0 total_video = len(videolists) total_VC_acc = [] for video in videolists: eval_video.reset() if args.method == 'clip_psp' or args.method == 'clip_ocr': test_dataset = TestDataset_longclip(args.dataroot, video, args, is_train=False) else: test_dataset = TestDataset_clip(args.dataroot, video, args, is_train=False) loader_test = torch.utils.data.DataLoader(test_dataset, batch_size=args.batchsize, shuffle=False, num_workers=0, drop_last=False) #### if args.method == 'nonlocal3d': gtlist_, predlist_, h, w = test_all(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) else: gtlist_, predlist_, h, w = test(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) accs = get_common(gtlist_, predlist_, args.vc_clip_num, h, w) print(sum(accs) / len(accs)) total_VC_acc.extend(accs) #### v_mIOU = eval_video.Mean_Intersection_over_Union() total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() print(video, v_mIOU) total_vfwIOU += v_fwIOU total_vmIOU = total_vmIOU / total_video total_vfwIOU = total_vfwIOU / total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print( "Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}" .format(Acc, Acc_class, mIoU, FWIoU, total_vmIOU, total_vfwIOU)) VC_Acc = np.array(total_VC_acc) VC_Acc = np.nanmean(VC_Acc) print("Video Consistency num :{} acc:{}".format(args.vc_clip_num, VC_Acc)) print('Inference done!')
flow = padder.unpad(flow) flow = flow.data.cpu() pred = Image.open( os.path.join(result_dir, video, imgname.split('.')[0] + '.png')) next_pred = Image.open( os.path.join(result_dir, video, next_imgname.split('.')[0] + '.png')) pred = torch.from_numpy(np.array(pred)) next_pred = torch.from_numpy(np.array(next_pred)) next_pred = next_pred.unsqueeze(0).unsqueeze(0).float() # print(next_pred) warp_pred = flowwarp(next_pred, flow) # print(warp_pred) warp_pred = warp_pred.int().squeeze(1).numpy() pred = pred.unsqueeze(0).numpy() evaluator.add_batch(pred, warp_pred) # v_mIoU = evaluator.Mean_Intersection_over_Union() # total_TC+=v_mIoU # print('processed video : {} score:{}'.format(video,v_mIoU)) #TC = total_TC/len(list_) TC = evaluator.Mean_Intersection_over_Union() print("TC score is {}".format(TC)) print(split) print(result_dir)
import sys eval_ = Evaluator(124) eval_.reset() DIR = sys.argv[1] split = 'val.txt' with open(os.path.join(DIR, split), 'r') as f: lines = f.readlines() for line in lines: videolist = [line[:-1] for line in lines] PRED = sys.argv[2] for video in videolist: for tar in os.listdir(os.path.join(DIR, 'data', video, 'mask')): pred = os.path.join(PRED, video, tar) tar_ = Image.open(os.path.join(DIR, 'data', video, 'mask', tar)) tar_ = np.array(tar_) tar_ = tar_[np.newaxis, :] pred_ = Image.open(pred) pred_ = np.array(pred_) pred_ = pred_[np.newaxis, :] eval_.add_batch(tar_, pred_) Acc = eval_.Pixel_Accuracy() Acc_class = eval_.Pixel_Accuracy_Class() mIoU = eval_.Mean_Intersection_over_Union() FWIoU = eval_.Frequency_Weighted_Intersection_over_Union() print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU))
def main(cfg, gpu, args): num_class = args.num_class torch.cuda.set_device(gpu) # Network Builders net_encoder = ModelBuilder.build_encoder(arch=cfg.MODEL.arch_encoder, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder(arch=cfg.MODEL.arch_decoder, fc_dim=cfg.MODEL.fc_dim, num_class=num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) to_load = torch.load(args.load, map_location=torch.device("cuda:" + str(args.start_gpu))) new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 segmentation_module.load_state_dict(new_state_dict) print('load model parameters') segmentation_module.cuda(args.start_gpu) with open(os.path.join(args.dataroot, args.split + '.txt')) as f: lines = f.readlines() videolists = [line[:-1] for line in lines] # Dataset and Loader evaluator = Evaluator(num_class) eval_video = Evaluator(num_class) evaluator.reset() eval_video.reset() total_vmIOU = 0.0 total_vfwIOU = 0.0 total_video = len(videolists) v = [] n = [] for video in videolists: eval_video.reset() dataset_test = TestDataset(args.dataroot, video, args) loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=args.batchsize, shuffle=False, num_workers=5, drop_last=False) # Main loop test(segmentation_module, loader_test, gpu, args, evaluator, eval_video, video) if args.split != 'test': v_mIOU = eval_video.Mean_Intersection_over_Union() v.append(v) n.append(video) print(video, v_mIOU) total_vmIOU += v_mIOU v_fwIOU = eval_video.Frequency_Weighted_Intersection_over_Union() total_vfwIOU += v_fwIOU if args.split != 'test': total_vmIOU = total_vmIOU / total_video total_vfwIOU = total_vfwIOU / total_video Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print( "Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}, video mIOU: {}, video fwIOU: {}" .format(Acc, Acc_class, mIoU, FWIoU, total_vmIOU, total_vfwIOU)) print('Inference done!')