class Tester(object): def __init__(self, args): if not os.path.isfile(args.model): raise RuntimeError("no checkpoint found at '{}'".fromat(args.model)) self.args = args self.color_map = get_pascal_labels() self.test_loader, self.ids, self.nclass = make_data_loader(args) #Define model model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=False, freeze_bn=False) self.model = model device = torch.device('cpu') checkpoint = torch.load(args.model, map_location=device) self.model.load_state_dict(checkpoint['state_dict']) self.evaluator = Evaluator(self.nclass) def save_image(self, array, id, op): text = 'gt' if op == 0: text = 'pred' file_name = str(id)+'_'+text+'.png' r = array.copy() g = array.copy() b = array.copy() for i in range(self.nclass): r[array == i] = self.color_map[i][0] g[array == i] = self.color_map[i][1] b[array == i] = self.color_map[i][2] rgb = np.dstack((r, g, b)) save_img = Image.fromarray(rgb.astype('uint8')) save_img.save(self.args.save_path+os.sep+file_name) def test(self): self.model.eval() self.evaluator.reset() # tbar = tqdm(self.test_loader, desc='\r') for i, sample in enumerate(self.test_loader): image, target = sample['image'], sample['label'] with torch.no_grad(): output = self.model(image) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) self.save_image(pred[0], self.ids[i], 0) self.save_image(target[0], self.ids[i], 1) self.evaluator.add_batch(target, pred) Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() print('Acc:{}, Acc_class:{}'.format(Acc, Acc_class))
def validation(epoch, model, args, criterion, nclass, test_tag=False): model.eval() losses = 0.0 evaluator = Evaluator(nclass) evaluator.reset() if test_tag == True: num_img = args.data_dict['num_valid'] else: num_img = args.data_dict['num_test'] for i in range(num_img): if test_tag == True: inputs = torch.FloatTensor(args.data_dict['valid_data'][i]).cuda() target = torch.FloatTensor(args.data_dict['valid_mask'][i]).cuda() else: inputs = torch.FloatTensor(args.data_dict['test_data'][i]).cuda() target = torch.FloatTensor(args.data_dict['test_mask'][i]).cuda() with torch.no_grad(): output = model(inputs) loss_val = criterion(output, target) print('epoch: {0}\t' 'iter: {1}/{2}\t' 'loss: {loss:.4f}'.format(epoch + 1, i + 1, args.data_dict['num_train'], loss=loss_val)) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) evaluator.add_batch(target, pred) losses += loss_val if test_tag == True: #save input,target,pred pred_save_dir = './pred/' sitk.WriteImage(sitk.GetImageFromArray(inputs), pred_save_dir + 'input_{}.nii.gz'.format(i)) sitk.WriteImage(sitk.GetImageFromArray(target), pred_save_dir + 'target_{}.nii.gz'.format(i)) sitk.WriteImage( sitk.GetImageFromArray(pred), pred_save_dir + 'pred_{}_{}.nii.gz'.format(i, epoch)) Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() if test_tag == True: print('Test:') else: print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, num_img)) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % losses)
def evals(arch='res18'): """ class IoU & mIoU, Acc & mAcc """ trainset, valset, testset = build_datasets(dataset='SUNRGBD', base_size=512, crop_size=512) # load model if arch == 'res18': model = BiSeNet(37, context_path='resnet18', in_planes=32) load_state_dict( model, ckpt_path= 'runs/SUNRGBD/kd_pi_lr1e-3_Jul28_002404/checkpoint.pth.tar') elif arch == 'res101': model = BiSeNet(37, context_path='resnet101', in_planes=64) load_state_dict( model, ckpt_path= 'runs/SUNRGBD/res101_inp64_deconv_Jul26_205859/checkpoint.pth.tar') else: raise NotImplementedError model.eval() model.cuda() evaluator = Evaluator(testset.num_classes) evaluator.reset() print('imgs:', len(testset)) for sample in tqdm(testset): # already transfrom image, target = sample['img'], sample['target'] image = image.unsqueeze(0).cuda() pred = model(image) pred = F.interpolate(pred, size=(512, 512), mode='bilinear', align_corners=True) pred = torch.argmax(pred, dim=1).squeeze().cpu().numpy() target = target.numpy() evaluator.add_batch(target, pred) print('PixelAcc:', evaluator.Pixel_Accuracy()) print('mAcc') # 各类的 acc 均值 Accs = evaluator.Acc_of_each_class() print(np.nanmean(Accs)) # mAcc, mean of non-NaN elements approx_print(Accs) print('mIoU') IOUs = evaluator.IOU_of_each_class() print(np.nanmean(IOUs)) # mIoU approx_print(IOUs)
def run(self, img_tensor, target, verbose=0, save_to=None): """Run metric on one image-saliency pair. Modified from: https://github.com/eclique/RISE/blob/master/evaluation.py Args: img_tensor (Tensor): normalized image tensor. explanation (np.ndarray): mean output. verbose (int): in [0, 1, 2]. 0 - return list of scores. 1 - also plot final step. 2 - also plot every step and print 2 top classes. save_to (str): directory to save every step plots to. Return: scores (nd.array): Array containing scores at every step. """ # Get the prediction pixels by taking the max logit across the 21 classes n_samples = img_tensor.shape[0] pred, explanation = torch.max(self.model(img_tensor.cuda()), (1)) evaluator = Evaluator(21) # The number of steps is pixel by pixel, TODO is to change this to a set of pixels grouped around the chosen pixel n_steps = ( HW + self.step - 1 ) // self.step # HW is the area of the images, they made it a global variable.... start = img_tensor.clone() # original input finish = self.substrate_fn(img_tensor) # aim to end with all 0's # miou = np.empty(n_steps + 1) miou = np.empty((n_samples, n_steps + 1)) # Coordinates of pixels in order of decreasing saliency # orders the pixels from most important to least by providing the index salient_order = np.flip(np.argsort(explanation.reshape( -1, HW).detach().cpu().numpy(), axis=1), axis=-1) # the indexes of proper order r = np.arange(n_samples).reshape(n_samples, 1) for i in tqdm(range(n_steps + 1), desc='Deleting Pixels'): pred, explanation = torch.max(self.model(start.cuda()), (1)) for j in range(n_samples): evaluator.add_batch(target[j].detach().cpu().numpy(), explanation[j].detach().cpu().numpy()) score = evaluator.Mean_Intersection_over_Union() evaluator.reset() miou[j][i] = score if i < n_steps: # coords = salient_order[:, self.step * i:self.step * (i + 1)] # start.cpu().numpy().reshape(1, 3, HW)[0, :, coords] = finish.cpu().numpy().reshape(1, 3, HW)[0, :, coords] coords = salient_order[:, self.step * i:self.step * (i + 1)] start.cpu().numpy().reshape( n_samples, 3, HW)[r, :, coords] = finish.cpu().numpy().reshape( n_samples, 3, HW)[r, :, coords] robust_loss = [self.get_gradients(miou[x]) for x in range(n_samples)] return robust_loss
def forward_all(net_inference, dataloader, visualize=False, opt=None): evaluator = Evaluator(21) evaluator.reset() with torch.no_grad(): for ii, sample in enumerate(dataloader): image, label = sample['image'].cuda(), sample['label'].cuda() activations = net_inference(image) image = image.cpu().numpy() label = label.cpu().numpy().astype(np.uint8) logits = activations[list(activations.keys( ))[-1]] if type(activations) != torch.Tensor else activations pred = torch.max(logits, 1)[1].cpu().numpy().astype(np.uint8) evaluator.add_batch(label, pred) # print(label.shape, pred.shape) if visualize: for jj in range(sample["image"].size()[0]): segmap_label = decode_segmap(label[jj], dataset='pascal') segmap_pred = decode_segmap(pred[jj], dataset='pascal') img_tmp = np.transpose(image[jj], axes=[1, 2, 0]) img_tmp *= (0.229, 0.224, 0.225) img_tmp += (0.485, 0.456, 0.406) img_tmp *= 255.0 img_tmp = img_tmp.astype(np.uint8) cv2.imshow('image', img_tmp[:, :, [2, 1, 0]]) cv2.imshow('gt', segmap_label) cv2.imshow('pred', segmap_pred) cv2.waitKey(0) Acc = evaluator.Pixel_Accuracy() Acc_class = evaluator.Pixel_Accuracy_Class() mIoU = evaluator.Mean_Intersection_over_Union() FWIoU = evaluator.Frequency_Weighted_Intersection_over_Union() print("Acc: {}".format(Acc)) print("Acc_class: {}".format(Acc_class)) print("mIoU: {}".format(mIoU)) print("FWIoU: {}".format(FWIoU)) if opt is not None: with open("seg_result.txt", 'a+') as ww: ww.write( "{}, quant: {}, relu: {}, equalize: {}, absorption: {}, correction: {}, clip: {}, distill_range: {}\n" .format(opt.dataset, opt.quantize, opt.relu, opt.equalize, opt.absorption, opt.correction, opt.clip_weight, opt.distill_range)) ww.write("Acc: {}, Acc_class: {}, mIoU: {}, FWIoU: {}\n\n".format( Acc, Acc_class, mIoU, FWIoU))
def eval(): args = parse_args() valset = JinNanDataset(images_dir=args.images_dir, maskes_dir=args.maskes_dir, images_list=args.images_list, transform=transforms.Compose([ FixScaleCrop(512), Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensor() ])) valloader = DataLoader(valset, batch_size=args.batch, shuffle=True, num_workers=0) print("starting loading the net and model") net = Res34Unet(3, 6) # net = PAN34(3, 6) #net = PAN50(3, 6) net.load_state_dict(torch.load(args.checkpoint_path)["model_state_dict"]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if args.cuda: args.gpus = [int(x) for x in args.gpus.split(",")] net = nn.DataParallel(net, device_ids=args.gpus) net.to(device) net.eval() print("finishing loading the net and model") print("start validating") evaluator = Evaluator(args.num_classes) evaluator.reset() with torch.no_grad(): for i, data in enumerate(valloader, 0): print("calculate %d batch" % (i+1)) # get the inputs inputs = data["image"].to(device) labels = data["mask"] # forward outputs = net(inputs) outputs = outputs.cpu().numpy() outputs = np.argmax(outputs, axis=1) # add batch evaluator.add_batch(labels.numpy(), outputs) ACC = evaluator.pixel_accuracy_class() MIoU = evaluator.mean_intersection_over_union() print("pixel accuracy class:", ACC) print("mean intersection over union:", MIoU) print("Finished validating")
def val_seg(model, dataLoader, epoch, loss_fn, num_classes, logger, tensorLogger, device='cuda', args=None): model.eval() logger.info("Valid | [{:2d}/{}]".format(epoch + 1, args.max_epoch)) losses = AverageMeter() batch_time = AverageMeter() evaluator = Evaluator(num_class=num_classes) evaluator.reset() with torch.no_grad(): for i, (inputs, target) in enumerate(dataLoader): inputs = inputs.to(device=device) target = target.to(device=device) initTime = time.time() output = model(inputs) loss = loss_fn(output, target) output_np = output.detach().cpu().numpy() target_np = target.detach().cpu().numpy() # print(output_np.shape, target_np.shape) evaluator.add_batch(target_np, np.argmax(output_np, axis=1)) losses.update(loss.item(), inputs.size(0)) batch_time.update(time.time() - initTime) if i % 100 == 0: # print after every 100 batches logger.info( "Valid | {:2d} | [{:4d}/{}] Infer:{:.2f}sec | Loss:{:.4f} | Miou:{:4f} |" .format(epoch + 1, i + 1, len(dataLoader), batch_time.avg, losses.avg, evaluator.Mean_Intersection_over_Union()[0])) Totalmiou = evaluator.Mean_Intersection_over_Union()[0] tensorLogger.add_scalar('val/loss', losses.avg, epoch + 1) tensorLogger.add_scalar('val/miou', Totalmiou, epoch + 1) return losses.avg, Totalmiou
class Eval(object): def __init__(self, args): self.args = args self.evaluator = Evaluator(args.nclass) self.loader = Loader(args) def evaluation(self): self.evaluator.reset() tbar = tqdm(self.loader) for i, sample in enumerate(tbar): names = sample['name'] preds = sample['pred'] labels = sample['label'] self.evaluator.add_batch(labels, preds) miou = self.evaluator.Mean_Intersection_over_Union() fwiou = self.evaluator.Frequency_Weighted_Intersection_over_Union() print("mIoU:", miou) print("fwIoU:", fwiou)
def test(model_path): args = makeargs() kwargs = {'num_workers': args.workers, 'pin_memory': True} train_loader, val_loader, test_loader, nclass = make_data_loader(args, **kwargs) print('Loading model...') model = DeepLab(num_classes=8, backbone='drn', output_stride=args.output_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) model.eval() checkpoint = torch.load(model_path) model = model.cuda() model.load_state_dict(checkpoint['state_dict']) print('Done') criterion = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) evaluator = Evaluator(nclass) evaluator.reset() print('Model infering') test_dir = 'test_example1' test_loss = 0.0 tbar = tqdm(test_loader, desc='\r') for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] image, target = image.cuda(), target.cuda() with torch.no_grad(): # output = model(image) loss = criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) evaluator.add_batch(target, pred) print(image.shape) Acc = evaluator.Pixel_Accuracy() mIoU = evaluator.Mean_Intersection_over_Union() print('testing:') print("Acc:{}, mIoU:{},".format(Acc, mIoU)) print('Loss: %.3f' % test_loss)
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() self.use_amp = True if (APEX_AVAILABLE and args.use_amp) else False self.opt_level = args.opt_level kwargs = { 'num_workers': args.workers, 'pin_memory': True, 'drop_last': True } self.train_loaderA, self.train_loaderB, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: raise NotImplementedError #if so, which trainloader to use? # weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) # Define network model = AutoDeeplab(self.nclass, 12, self.criterion, self.args.filter_multiplier, self.args.block_multiplier, self.args.step) optimizer = torch.optim.SGD(model.weight_parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.model, self.optimizer = model, optimizer self.architect_optimizer = torch.optim.Adam( self.model.arch_parameters(), lr=args.arch_lr, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loaderA), min_lr=args.min_lr) # TODO: Figure out if len(self.train_loader) should be devided by two ? in other module as well # Using cuda if args.cuda: self.model = self.model.cuda() # mixed precision if self.use_amp and args.cuda: keep_batchnorm_fp32 = True if (self.opt_level == 'O2' or self.opt_level == 'O3') else None # fix for current pytorch version with opt_level 'O1' if self.opt_level == 'O1' and torch.__version__ < '1.3': for module in self.model.modules(): if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): # Hack to fix BN fprop without affine transformation if module.weight is None: module.weight = torch.nn.Parameter( torch.ones(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) if module.bias is None: module.bias = torch.nn.Parameter( torch.zeros(module.running_var.shape, dtype=module.running_var.dtype, device=module.running_var.device), requires_grad=False) # print(keep_batchnorm_fp32) self.model, [self.optimizer, self.architect_optimizer] = amp.initialize( self.model, [self.optimizer, self.architect_optimizer], opt_level=self.opt_level, keep_batchnorm_fp32=keep_batchnorm_fp32, loss_scale="dynamic") print('cuda finished') # Using data parallel if args.cuda and len(self.args.gpu_ids) > 1: if self.opt_level == 'O2' or self.opt_level == 'O3': print( 'currently cannot run with nn.DataParallel and optimization level', self.opt_level) self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) print('training on multiple-GPUs') #checkpoint = torch.load(args.resume) #print('about to load state_dict') #self.model.load_state_dict(checkpoint['state_dict']) #print('model loaded') #sys.exit() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v # self.model.load_state_dict(new_state_dict) copy_state_dict(self.model.state_dict(), new_state_dict) else: if torch.cuda.device_count() > 1 or args.load_parallel: # self.model.module.load_state_dict(checkpoint['state_dict']) copy_state_dict(self.model.module.state_dict(), checkpoint['state_dict']) else: # self.model.load_state_dict(checkpoint['state_dict']) copy_state_dict(self.model.state_dict(), checkpoint['state_dict']) if not args.ft: # self.optimizer.load_state_dict(checkpoint['optimizer']) copy_state_dict(self.optimizer.state_dict(), checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loaderA) num_img_tr = len(self.train_loaderA) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) if self.use_amp: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() self.optimizer.step() if epoch >= self.args.alpha_epoch: search = next(iter(self.train_loaderB)) image_search, target_search = search['image'], search['label'] if self.args.cuda: image_search, target_search = image_search.cuda( ), target_search.cuda() self.architect_optimizer.zero_grad() output_search = self.model(image_search) arch_loss = self.criterion(output_search, target_search) if self.use_amp: with amp.scale_loss( arch_loss, self.architect_optimizer) as arch_scaled_loss: arch_scaled_loss.backward() else: arch_loss.backward() self.architect_optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) #self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) #torch.cuda.empty_cache() self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False if torch.cuda.device_count() > 1: state_dict = self.model.module.state_dict() else: state_dict = self.model.state_dict() self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': state_dict, 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred if torch.cuda.device_count() > 1: state_dict = self.model.module.state_dict() else: state_dict = self.model.state_dict() self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': state_dict, 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Trainer(object): def __init__(self, config, args): self.args = args self.config = config self.visdom = args.visdom if args.visdom: self.vis = visdom.Visdom(env=os.getcwd().split('/')[-1], port=8888) # Define Dataloader self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( config) # Define network self.model = DeepLab(num_classes=self.nclass, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) train_params = [{ 'params': self.model.get_1x_lr_params(), 'lr': config.lr }, { 'params': self.model.get_10x_lr_params(), 'lr': config.lr * 10 }] # Define Optimizer self.optimizer = torch.optim.SGD(train_params, momentum=config.momentum, weight_decay=config.weight_decay) # Define Criterion # whether to use class balanced weights self.criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=config.loss) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(config.lr_scheduler, config.lr, config.epochs, len(self.train_loader), config.lr_step, config.warmup_epochs) self.summary = TensorboardSummary('train_log') # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) # cudnn.benchmark = True self.model = self.model.cuda() self.best_pred_source = 0.0 # Resuming checkpoint if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) if args.cuda: self.model.module.load_state_dict(checkpoint) else: self.model.load_state_dict(checkpoint, map_location=torch.device('cpu')) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch)) def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): itr = epoch * len(self.train_loader) + i if self.visdom: self.vis.line( X=torch.tensor([itr]), Y=torch.tensor([self.optimizer.param_groups[0]['lr']]), win='lr', opts=dict(title='lr', xlabel='iter', ylabel='lr'), update='append' if itr > 0 else None) A_image, A_target = sample['image'], sample['label'] if self.args.cuda: A_image, A_target = A_image.cuda(), A_target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred_source, 0) A_output, A_feat, A_low_feat = self.model(A_image) self.optimizer.zero_grad() # Supervised loss seg_loss = self.criterion(A_output, A_target) loss = seg_loss loss.backward() self.optimizer.step() train_loss += seg_loss.item() self.summary.writer.add_scalar('Train/Loss', loss.item(), itr) tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config.batch_size + A_image.data.shape[0])) print('Seg Loss: %.3f' % train_loss) if self.visdom: self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([seg_loss_sum]), win='train_loss', name='Seg_loss', opts=dict(title='loss', xlabel='epoch', ylabel='loss'), update='append' if epoch > 0 else None) def validation(self, epoch): def get_metrics(tbar, if_source=False): self.evaluator.reset() test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output, low_feat, feat = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) self.summary.writer.add_scalar('Val/Loss', test_loss / (i + 1), epoch) # Fast test during the training Acc = self.evaluator.Building_Acc() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() if if_source: print('Validation on source:') else: print('Validation on target:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config.batch_size + image.data.shape[0])) print("Acc:{}, IoU:{}, mIoU:{}".format(Acc, IoU, mIoU)) print('Loss: %.3f' % test_loss) if if_source: names = ['source', 'source_acc', 'source_IoU', 'source_mIoU'] self.summary.writer.add_scalar('Val/SourceAcc', Acc, epoch) self.summary.writer.add_scalar('Val/SourceIoU', IoU, epoch) else: names = ['target', 'target_acc', 'target_IoU', 'target_mIoU'] self.summary.writer.add_scalar('Val/TargetAcc', Acc, epoch) self.summary.writer.add_scalar('Val/TargetIoU', IoU, epoch) # Draw Visdom if self.visdom: self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([test_loss]), win='val_loss', name=names[0], update='append') self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([Acc]), win='metrics', name=names[1], opts=dict(title='metrics', xlabel='epoch', ylabel='performance'), update='append' if epoch > 0 else None) self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([IoU]), win='metrics', name=names[2], update='append') self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([mIoU]), win='metrics', name=names[3], update='append') return Acc, IoU, mIoU self.model.eval() tbar_source = tqdm(self.val_loader, desc='\r') s_acc, s_iou, s_miou = get_metrics(tbar_source, True) new_pred_source = s_iou if new_pred_source > self.best_pred_source: is_best = True self.best_pred_source = max(new_pred_source, self.best_pred_source) print('Saving state, epoch:', epoch) torch.save( self.model.module.state_dict(), self.args.save_folder + 'models/' + 'epoch' + str(epoch) + '.pth') loss_file = {'s_Acc': s_acc, 's_IoU': s_iou, 's_mIoU': s_miou} with open( os.path.join(self.args.save_folder, 'eval', 'epoch' + str(epoch) + '.json'), 'w') as f: json.dump(loss_file, f)
class Trainer(object): def __init__(self, config, args): self.args = args self.config = config self.visdom = args.visdom if args.visdom: self.vis = visdom.Visdom(env=os.getcwd().split('/')[-1], port=8888) # Define Dataloader self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( config) self.target_train_loader, self.target_val_loader, self.target_test_loader, _ = make_target_data_loader( config) # Define network self.model = DeepLab(num_classes=self.nclass, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) self.D = Discriminator(num_classes=self.nclass, ndf=16) train_params = [{ 'params': self.model.get_1x_lr_params(), 'lr': config.lr }, { 'params': self.model.get_10x_lr_params(), 'lr': config.lr * config.lr_ratio }] # Define Optimizer self.optimizer = torch.optim.SGD(train_params, momentum=config.momentum, weight_decay=config.weight_decay) self.D_optimizer = torch.optim.Adam(self.D.parameters(), lr=config.lr, betas=(0.9, 0.99)) # Define Criterion # whether to use class balanced weights self.criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=config.loss) self.entropy_mini_loss = MinimizeEntropyLoss() self.bottleneck_loss = BottleneckLoss() self.instance_loss = InstanceLoss() # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(config.lr_scheduler, config.lr, config.epochs, len(self.train_loader), config.lr_step, config.warmup_epochs) self.summary = TensorboardSummary('./train_log') # labels for adversarial training self.source_label = 0 self.target_label = 1 # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) # cudnn.benchmark = True self.model = self.model.cuda() self.D = torch.nn.DataParallel(self.D) patch_replication_callback(self.D) self.D = self.D.cuda() self.best_pred_source = 0.0 self.best_pred_target = 0.0 # Resuming checkpoint if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) if args.cuda: self.model.module.load_state_dict(checkpoint) else: self.model.load_state_dict(checkpoint, map_location=torch.device('cpu')) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch)) def training(self, epoch): train_loss, seg_loss_sum, bn_loss_sum, entropy_loss_sum, adv_loss_sum, d_loss_sum, ins_loss_sum = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 self.model.train() if config.freeze_bn: self.model.module.freeze_bn() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) target_train_iterator = iter(self.target_train_loader) for i, sample in enumerate(tbar): itr = epoch * len(self.train_loader) + i #if self.visdom: # self.vis.line(X=torch.tensor([itr]), Y=torch.tensor([self.optimizer.param_groups[0]['lr']]), # win='lr', opts=dict(title='lr', xlabel='iter', ylabel='lr'), # update='append' if itr>0 else None) self.summary.writer.add_scalar( 'Train/lr', self.optimizer.param_groups[0]['lr'], itr) A_image, A_target = sample['image'], sample['label'] # Get one batch from target domain try: target_sample = next(target_train_iterator) except StopIteration: target_train_iterator = iter(self.target_train_loader) target_sample = next(target_train_iterator) B_image, B_target, B_image_pair = target_sample[ 'image'], target_sample['label'], target_sample['image_pair'] if self.args.cuda: A_image, A_target = A_image.cuda(), A_target.cuda() B_image, B_target, B_image_pair = B_image.cuda( ), B_target.cuda(), B_image_pair.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred_source, self.best_pred_target, self.config.lr_ratio) self.scheduler(self.D_optimizer, i, epoch, self.best_pred_source, self.best_pred_target, self.config.lr_ratio) A_output, A_feat, A_low_feat = self.model(A_image) B_output, B_feat, B_low_feat = self.model(B_image) #B_output_pair, B_feat_pair, B_low_feat_pair = self.model(B_image_pair) #B_output_pair, B_feat_pair, B_low_feat_pair = flip(B_output_pair, dim=-1), flip(B_feat_pair, dim=-1), flip(B_low_feat_pair, dim=-1) self.optimizer.zero_grad() self.D_optimizer.zero_grad() # Train seg network for param in self.D.parameters(): param.requires_grad = False # Supervised loss seg_loss = self.criterion(A_output, A_target) main_loss = seg_loss # Unsupervised loss #ins_loss = 0.01 * self.instance_loss(B_output, B_output_pair) #main_loss += ins_loss # Train adversarial loss D_out = self.D(prob_2_entropy(F.softmax(B_output))) adv_loss = bce_loss(D_out, self.source_label) main_loss += self.config.lambda_adv * adv_loss main_loss.backward() # Train discriminator for param in self.D.parameters(): param.requires_grad = True A_output_detach = A_output.detach() B_output_detach = B_output.detach() # source D_source = self.D(prob_2_entropy(F.softmax(A_output_detach))) source_loss = bce_loss(D_source, self.source_label) source_loss = source_loss / 2 # target D_target = self.D(prob_2_entropy(F.softmax(B_output_detach))) target_loss = bce_loss(D_target, self.target_label) target_loss = target_loss / 2 d_loss = source_loss + target_loss d_loss.backward() self.optimizer.step() self.D_optimizer.step() seg_loss_sum += seg_loss.item() #ins_loss_sum += ins_loss.item() adv_loss_sum += self.config.lambda_adv * adv_loss.item() d_loss_sum += d_loss.item() #train_loss += seg_loss.item() + self.config.lambda_adv * adv_loss.item() train_loss += seg_loss.item() self.summary.writer.add_scalar('Train/SegLoss', seg_loss.item(), itr) #self.summary.writer.add_scalar('Train/InsLoss', ins_loss.item(), itr) self.summary.writer.add_scalar('Train/AdvLoss', adv_loss.item(), itr) self.summary.writer.add_scalar('Train/DiscriminatorLoss', d_loss.item(), itr) tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) # Show the results of the last iteration #if i == len(self.train_loader)-1: print("Add Train images at epoch" + str(epoch)) self.summary.visualize_image('Train-Source', self.config.dataset, A_image, A_target, A_output, epoch, 5) self.summary.visualize_image('Train-Target', self.config.target, B_image, B_target, B_output, epoch, 5) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config.batch_size + A_image.data.shape[0])) print('Loss: %.3f' % train_loss) #print('Seg Loss: %.3f' % seg_loss_sum) #print('Ins Loss: %.3f' % ins_loss_sum) #print('BN Loss: %.3f' % bn_loss_sum) #print('Adv Loss: %.3f' % adv_loss_sum) #print('Discriminator Loss: %.3f' % d_loss_sum) #if self.visdom: #self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([seg_loss_sum]), win='train_loss', name='Seg_loss', # opts=dict(title='loss', xlabel='epoch', ylabel='loss'), # update='append' if epoch > 0 else None) #self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([ins_loss_sum]), win='train_loss', name='Ins_loss', # opts=dict(title='loss', xlabel='epoch', ylabel='loss'), # update='append' if epoch > 0 else None) #self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([bn_loss_sum]), win='train_loss', name='BN_loss', # opts=dict(title='loss', xlabel='epoch', ylabel='loss'), # update='append' if epoch > 0 else None) #self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([adv_loss_sum]), win='train_loss', name='Adv_loss', # opts=dict(title='loss', xlabel='epoch', ylabel='loss'), # update='append' if epoch > 0 else None) #self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([d_loss_sum]), win='train_loss', name='Dis_loss', # opts=dict(title='loss', xlabel='epoch', ylabel='loss'), # update='append' if epoch > 0 else None) def validation(self, epoch): def get_metrics(tbar, if_source=False): self.evaluator.reset() test_loss = 0.0 #feat_mean, low_feat_mean, feat_var, low_feat_var = 0, 0, 0, 0 #adv_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output, low_feat, feat = self.model(image) #low_feat = low_feat.cpu().numpy() #feat = feat.cpu().numpy() #if isinstance(feat, np.ndarray): # feat_mean += feat.mean(axis=0).mean(axis=1).mean(axis=1) # low_feat_mean += low_feat.mean(axis=0).mean(axis=1).mean(axis=1) # feat_var += feat.var(axis=0).var(axis=1).var(axis=1) # low_feat_var += low_feat.var(axis=0).var(axis=1).var(axis=1) #else: # feat_mean = feat.mean(axis=0).mean(axis=1).mean(axis=1) # low_feat_mean = low_feat.mean(axis=0).mean(axis=1).mean(axis=1) # feat_var = feat.var(axis=0).var(axis=1).var(axis=1) # low_feat_var = low_feat.var(axis=0).var(axis=1).var(axis=1) #d_output = self.D(prob_2_entropy(F.softmax(output))) #adv_loss += bce_loss(d_output, self.source_label).item() loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target_ = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target_, pred) if if_source: print("Add Validation-Source images at epoch" + str(epoch)) self.summary.visualize_image('Val-Source', self.config.dataset, image, target, output, epoch, 5) else: print("Add Validation-Target images at epoch" + str(epoch)) self.summary.visualize_image('Val-Target', self.config.target, image, target, output, epoch, 5) #feat_mean /= (i+1) #low_feat_mean /= (i+1) #feat_var /= (i+1) #low_feat_var /= (i+1) #adv_loss /= (i+1) # Fast test during the training Acc = self.evaluator.Building_Acc() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() if if_source: print('Validation on source:') else: print('Validation on target:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config.batch_size + image.data.shape[0])) print("Acc:{}, IoU:{}, mIoU:{}".format(Acc, IoU, mIoU)) print('Loss: %.3f' % test_loss) if if_source: names = ['source', 'source_acc', 'source_IoU', 'source_mIoU'] self.summary.writer.add_scalar('Val/SourceAcc', Acc, epoch) self.summary.writer.add_scalar('Val/SourceIoU', IoU, epoch) else: names = ['target', 'target_acc', 'target_IoU', 'target_mIoU'] self.summary.writer.add_scalar('Val/TargetAcc', Acc, epoch) self.summary.writer.add_scalar('Val/TargetIoU', IoU, epoch) # Draw Visdom #if if_source: # names = ['source', 'source_acc', 'source_IoU', 'source_mIoU'] #else: # names = ['target', 'target_acc', 'target_IoU', 'target_mIoU'] #if self.visdom: # self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([test_loss]), win='val_loss', name=names[0], # update='append') # self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([adv_loss]), win='val_loss', name='adv_loss', # update='append') # self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([Acc]), win='metrics', name=names[1], # opts=dict(title='metrics', xlabel='epoch', ylabel='performance'), # update='append' if epoch > 0 else None) # self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([IoU]), win='metrics', name=names[2], # update='append') # self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([mIoU]), win='metrics', name=names[3], # update='append') return Acc, IoU, mIoU self.model.eval() tbar_source = tqdm(self.val_loader, desc='\r') tbar_target = tqdm(self.target_val_loader, desc='\r') s_acc, s_iou, s_miou = get_metrics(tbar_source, True) t_acc, t_iou, t_miou = get_metrics(tbar_target, False) new_pred_source = s_iou new_pred_target = t_iou if new_pred_source > self.best_pred_source or new_pred_target > self.best_pred_target: is_best = True self.best_pred_source = max(new_pred_source, self.best_pred_source) self.best_pred_target = max(new_pred_target, self.best_pred_target) print('Saving state, epoch:', epoch) torch.save( self.model.module.state_dict(), self.args.save_folder + 'models/' + 'epoch' + str(epoch) + '.pth') loss_file = { 's_Acc': s_acc, 's_IoU': s_iou, 's_mIoU': s_miou, 't_Acc': t_acc, 't_IoU': t_iou, 't_mIoU': t_miou } with open( os.path.join(self.args.save_folder, 'eval', 'epoch' + str(epoch) + '.json'), 'w') as f: json.dump(loss_file, f)
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader if args.dataset == 'CamVid': size = 512 train_file = os.path.join(os.getcwd() + "\\data\\CamVid", "train.csv") val_file = os.path.join(os.getcwd() + "\\data\\CamVid", "val.csv") print('=>loading datasets') train_data = CamVidDataset(csv_file=train_file, phase='train') self.train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_data = CamVidDataset(csv_file=val_file, phase='val', flip_rate=0) self.val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) self.num_class = 32 elif args.dataset == 'Cityscapes': kwargs = {'num_workers': args.num_workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.num_class = make_data_loader(args, **kwargs) # Define network if args.net == 'resnet101': blocks = [2,4,23,3] fpn = FPN(blocks, self.num_class, back_bone=args.net) # Define Optimizer self.lr = self.args.lr if args.optimizer == 'adam': self.lr = self.lr * 0.1 optimizer = torch.optim.Adam(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(fpn.parameters(), lr=args.lr, momentum=0, weight_decay=args.weight_decay) # Define Criterion if args.dataset == 'CamVid': self.criterion = nn.CrossEntropyLoss() elif args.dataset == 'Cityscapes': weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode='ce') self.model = fpn self.optimizer = optimizer # Define Evaluator self.evaluator = Evaluator(self.num_class) # multiple mGPUs if args.mGPUs: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume: output_dir = os.path.join(args.save_dir, args.dataset, args.checkname) runs = sorted(glob.glob(os.path.join(output_dir, 'experiment_*'))) run_id = int(runs[-1].split('_')[-1]) - 1 if runs else 0 experiment_dir = os.path.join(output_dir, 'experiment_{}'.format(str(run_id))) load_name = os.path.join(experiment_dir, 'checkpoint.pth.tar') if not os.path.isfile(load_name): raise RuntimeError("=> no checkpoint found at '{}'".format(load_name)) checkpoint = torch.load(load_name) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] self.lr = checkpoint['optimizer']['param_groups'][0]['lr'] print("=> loaded checkpoint '{}'(epoch {})".format(load_name, checkpoint['epoch'])) self.lr_stage = [68, 93] self.lr_staget_ind = 0 def training(self, epoch): train_loss = 0.0 self.model.train() # tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) if self.lr_staget_ind > 1 and epoch % (self.lr_stage[self.lr_staget_ind]) == 0: adjust_learning_rate(self.optimizer, self.args.lr_decay_gamma) self.lr *= self.args.lr_decay_gamma self.lr_staget_ind += 1 for iteration, batch in enumerate(self.train_loader): if self.args.dataset == 'CamVid': image, target = batch['X'], batch['l'] elif self.args.dataset == 'Cityscapes': image, target = batch['image'], batch['label'] else: raise NotImplementedError if self.args.cuda: image, target = image.cuda(), target.cuda() self.optimizer.zero_grad() inputs = Variable(image) labels = Variable(target) outputs = self.model(inputs) loss = self.criterion(outputs, labels.long()) loss_val = loss.item() loss.backward(torch.ones_like(loss)) # loss.backward() self.optimizer.step() train_loss += loss.item() # tbar.set_description('\rTrain loss:%.3f' % (train_loss / (iteration + 1))) if iteration % 10 == 0: print("Epoch[{}]({}/{}):Loss:{:.4f}, learning rate={}".format(epoch, iteration, len(self.train_loader), loss.data, self.lr)) self.writer.add_scalar('train/total_loss_iter', loss.item(), iteration + num_img_tr * epoch) #if iteration % (num_img_tr // 10) == 0: # global_step = iteration + num_img_tr * epoch # self.summary.visualize_image(self.witer, self.args.dataset, image, target, outputs, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, iteration * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for iter, batch in enumerate(self.val_loader): if self.args.dataset == 'CamVid': image, target = batch['X'], batch['l'] elif self.args.dataset == 'Cityscapes': image, target = batch['image'], batch['label'] else: raise NotImplementedError if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f ' % (test_loss / (iter + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/FWIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, iter * self.args.batch_size + image.shape[0])) print("Acc:{:.5f}, Acc_class:{:.5f}, mIoU:{:.5f}, fwIoU:{:.5f}".format(Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Test: def __init__(self, model_path, config, bn, save_path, save_batch, cuda=False): self.bn = bn self.target = config.all_dataset self.target.remove(config.dataset) # load source domain self.source_set = spacenet.Spacenet(city=config.dataset, split='test', img_root=config.img_root, source_dist=dist[config.dataset]) self.source_loader = DataLoader(self.source_set, batch_size=16, shuffle=False, num_workers=2) self.save_path = save_path self.save_batch = save_batch self.target_set = [] self.target_loader = [] self.target_trainset = [] self.target_trainloader = [] self.config = config # load other domains for city in self.target: test = spacenet.Spacenet(city=city, split='test', img_root=config.img_root, source_dist=dist[city]) self.target_set.append(test) self.target_loader.append( DataLoader(test, batch_size=16, shuffle=False, num_workers=2)) train = spacenet.Spacenet(city=city, split='train', img_root=config.img_root, source_dist=dist[city]) self.target_trainset.append(train) self.target_trainloader.append( DataLoader(train, batch_size=16, shuffle=False, num_workers=2)) self.model = DeepLab(num_classes=2, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=False) if cuda: self.checkpoint = torch.load(model_path) else: self.checkpoint = torch.load(model_path, map_location=torch.device('cpu')) #print(self.checkpoint.keys()) self.model.load_state_dict(self.checkpoint) self.evaluator = Evaluator(2) self.cuda = cuda if cuda: self.model = self.model.cuda() def get_performance(self, dataloader, trainloader, city): # change mean and var of bn to adapt to the target domain if self.bn and city != self.config.dataset: print('BN Adaptation on ' + city) self.model.train() for sample in trainloader: image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) if len(output) > 1: output = output[0] batch = self.save_batch if batch < 0: batch = len(dataloader) self.model.eval() self.evaluator.reset() tbar = tqdm(dataloader, desc='\r') # save in different directories if self.bn: save_path = os.path.join(self.save_path, city + '_bn') else: save_path = os.path.join(self.save_path, city) # evaluate on the test dataset for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) if len(output) > 1: output = output[0] pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # save pictures if batch > 0: if not os.path.exists(self.save_path): os.mkdir(self.save_path) if not os.path.exists(save_path): os.mkdir(save_path) image = image.cpu().numpy() * 255 image = image.transpose(0, 2, 3, 1).astype(int) imgs = self.color_images(pred, target) self.save_images(imgs, batch, save_path, False) self.save_images(image, batch, save_path, True) batch -= 1 Acc = self.evaluator.Building_Acc() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() return Acc, IoU, mIoU def test(self): A, I, Im = self.get_performance(self.source_loader, None, self.config.dataset) tA, tI, tIm = [], [], [] for dl, tl, city in zip(self.target_loader, self.target_trainloader, self.target): tA_, tI_, tIm_ = self.get_performance(dl, tl, city) tA.append(tA_) tI.append(tI_) tIm.append(tIm_) res = {} print("Test for source domain:") print("{}: Acc:{}, IoU:{}, mIoU:{}".format(self.config.dataset, A, I, Im)) res[config.dataset] = {'Acc': A, 'IoU': I, 'mIoU': Im} print('Test for target domain:') for i, city in enumerate(self.target): print("{}: Acc:{}, IoU:{}, mIoU:{}".format(city, tA[i], tI[i], tIm[i])) res[city] = {'Acc': tA[i], 'IoU': tI[i], 'mIoU': tIm[i]} if self.bn: name = 'train_log/test_bn.json' else: name = 'train_log/test.json' with open(name, 'w') as f: json.dump(res, f) def save_images(self, imgs, batch_index, save_path, if_original=False): for i, img in enumerate(imgs): img = img[:, :, ::-1] # change to BGR #from IPython import embed #embed() if if_original: cv2.imwrite( os.path.join(save_path, str(batch_index) + str(i) + '_Original.jpg'), img) else: cv2.imwrite( os.path.join(save_path, str(batch_index) + str(i) + '_Pred.jpg'), img) def color_images(self, pred, target): imgs = [] for p, t in zip(pred, target): tmp = p * 2 + t np.squeeze(tmp) img = np.zeros((p.shape[0], p.shape[1], 3)) # bkg:negative, building:postive #from IPython import embed #embed() img[np.where(tmp == 0)] = [0, 0, 0] # Black RGB, for true negative img[np.where(tmp == 1)] = [255, 0, 0] # Red RGB, for false negative img[np.where(tmp == 2)] = [0, 255, 0] # Green RGB, for false positive img[np.where(tmp == 3)] = [255, 255, 0] #Yellow RGB, for true positive imgs.append(img) return imgs
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset)[0], args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if os.path.isfile(args.resume): checkpoint = torch.load(args.resume,map_location=torch.device('cpu')) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) # self.best_pred = checkpoint['best_pred']-0.3 print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) for i, sample in enumerate(tbar): image, target,weight = sample['image'], sample['label'],sample['weight'] if self.args.cuda: image, target,weight= image.cuda(), target.cuda(),weight.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = 0 for index in range(output.shape[0]): temp1 = output[index].unsqueeze(0) temp2 = target[index].unsqueeze(0) loss = loss + weight[index,0,0]*self.criterion(temp1,temp2) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) # self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target= sample['image'], sample['label']#, sample['weight'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # for channels in range(target.shape[0]): # imagex = image[channels].cpu().numpy() # imagex = np.transpose(imagex,(1,2,0)) # pre = pred[channels] # targ = target[channels] # plt.subplot(131) # plt.imshow(imagex) # plt.subplot(132) # image1 = imagex.copy() # for i in [0,1] : # g = image1[:,:,i] # g[pre>0.5] = 255 # image1[:,:,i] = g # for i in [2]: # g = image1[:,:,i] # g[pre>0.5] = 0 # image1[:,:,i] = g # plt.imshow(image1) # plt.subplot(133) # image2 = imagex.copy() # for i in [0,1] : # g = image2[:,:,i] # g[targ>0.5] = 255 # image2[:,:,i] = g # for i in [2]: # g = image2[:,:,i] # g[targ>0.5] = 0 # image2[:,:,i] = g # plt.imshow(image2) # plt.show() # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() xy_mIoU = self.evaluator.xy_Mean_Intersection_over_Union() print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.test_batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU)) print("min_mIoU{}".format(xy_mIoU)) print('Loss: %.3f' % test_loss) new_pred = xy_mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Test: def __init__(self, model_path, config, bn, save_path, save_batch, cuda=False): self.bn = bn self.target = config.all_dataset self.target.remove(config.dataset) # load source domain self.source_set = spacenet.Spacenet(city=config.dataset, split='val', img_root=config.img_root) self.source_loader = DataLoader(self.source_set, batch_size=16, shuffle=False, num_workers=2) self.save_path = save_path self.save_batch = save_batch self.target_set = [] self.target_loader = [] self.target_trainset = [] self.target_trainloader = [] self.config = config # load other domains for city in self.target: test = spacenet.Spacenet(city=city, split='val', img_root=config.img_root) self.target_set.append(test) self.target_loader.append( DataLoader(test, batch_size=16, shuffle=False, num_workers=2)) train = spacenet.Spacenet(city=city, split='train', img_root=config.img_root) self.target_trainset.append(train) self.target_trainloader.append( DataLoader(train, batch_size=16, shuffle=False, num_workers=2)) self.model = DeepLab(num_classes=2, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) if cuda: self.checkpoint = torch.load(model_path) else: self.checkpoint = torch.load(model_path, map_location=torch.device('cpu')) #print(self.checkpoint.keys()) self.model.load_state_dict(self.checkpoint) self.evaluator = Evaluator(2) self.cuda = cuda if cuda: self.model = self.model.cuda() def save_output(module, input, output): global activation, i # save output print('I came here') channels = output.permute(1, 0, 2, 3) c = channels.shape[0] features = channels.reshape(c, -1) if len(activation) == i: activation.append(features) else: activation[i] = torch.cat([activation[i], features], dim=1) i += 1 return def get_performance(self, dataloader, trainloader, city): if 1: pix = torch.load("pix" + self.config.dataset + "_" + city + ".pt") for k in range(0, len(pix)): fig = plt.figure() plt.hist(pix[k]) plt.xlabel('Activation values') plt.ylabel("Count") #plt.legend() fig.savefig('./train_log/figs/pix_' + self.config.dataset + '_' + city + 'act' + str(k) + '.png') return [], [], [] # change mean and var of bn to adapt to the target domain if self.bn and city != self.config.dataset: self.checkpoint = torch.load('./train_log/' + self.config.dataset + '_da_' + city + '.pth') self.model.load_state_dict(self.checkpoint) if self.cuda: self.model = self.model.cuda() if 0: #self.bn and city != self.config.dataset: print('BN Adaptation on' + city) self.model.train() tbar = tqdm(dataloader, desc='\r') for sample in trainloader: image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) batch = self.save_batch self.model.eval() self.evaluator.reset() tbar = tqdm(dataloader, desc='\r') # save in different directories if self.bn: save_path = os.path.join(self.save_path, city + '_bn') else: save_path = os.path.join(self.save_path, city) global randCh #randCh={}; global first first = 1 global ii ii = 0 global ncity ncity = city randCh = torch.load('randCh.pt') #if city != self.config.dataset: #randCh = torch.load('randCh.pt') #else: # randCh={}; layr = 0 aa = 0 if city == self.config.dataset: for hh in self.model.modules(): if isinstance(hh, nn.ReLU6): #Conv2d): layr += 1 if layr % 5 == 0: #if first==1 and city == self.config.dataset: #randCh[aa] = np.random.randint(hh.out_channels) #print(layr) hh.register_forward_hook(save_output2) aa += 1 # evaluate on the test dataset pix = {} pix1 = {} for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) #manipulate activations here for k in activation.keys(): if first == 1: pix[k] = [] pix1[k] = [] for row in range(0, activation[k].shape[1]): actkrow = activation[k][:, row, :-1].reshape( -1).cpu().numpy() pix[k] = np.hstack((pix[k], actkrow)) actkrow1 = activation[k][:, row, 1:].reshape(-1).cpu().numpy() pix1[k] = np.hstack((pix1[k], actkrow1)) for bb in range(0, activation[k].size(0)): cv2.imwrite( os.path.join( save_path, 'act' + str(k) + 'im' + str(i) + 'b' + str(bb) + city + '.jpg'), activation[k][bb, :].cpu().numpy() * 255) first += 1 ii = 0 pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # save pictures if batch > 0: if not os.path.exists(self.save_path): os.mkdir(self.save_path) if not os.path.exists(save_path): os.mkdir(save_path) image = image.cpu().numpy() * 255 image = image.transpose(0, 2, 3, 1).astype(int) imgs = self.color_images(pred, target) self.save_images(imgs, batch, save_path, False) self.save_images(image, batch, save_path, True) batch -= 1 corrVal = {} for k in activation.keys(): corrVal[k] = np.corrcoef(pix[k], pix1[k])[0, 1] #_ = plt.hist(pix[k]); plt.show() torch.save(pix, "pix" + self.config.dataset + "_" + city + ".pt") print(corrVal) Acc = self.evaluator.Building_Acc() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() return Acc, IoU, mIoU def test(self): A, I, Im = self.get_performance(self.source_loader, None, self.config.dataset) tA, tI, tIm = [], [], [] for dl, tl, city in zip(self.target_loader, self.target_trainloader, self.target): tA_, tI_, tIm_ = self.get_performance(dl, tl, city) tA.append(tA_) tI.append(tI_) tIm.append(tIm_) res = {} print("Test for source domain:") print("{}: Acc:{}, IoU:{}, mIoU:{}".format(self.config.dataset, A, I, Im)) res[config.dataset] = {'Acc': A, 'IoU': I, 'mIoU': Im} print('Test for target domain:') for i, city in enumerate(self.target): print("{}: Acc:{}, IoU:{}, mIoU:{}".format(city, tA[i], tI[i], tIm[i])) res[city] = {'Acc': tA[i], 'IoU': tI[i], 'mIoU': tIm[i]} if self.bn: name = 'train_log/test_bn.json' else: name = 'train_log/test.json' with open(name, 'w') as f: json.dump(res, f) def save_images(self, imgs, batch_index, save_path, if_original=False): for i, img in enumerate(imgs): img = img[:, :, ::-1] # change to BGR #from IPython import embed #embed() if not if_original: cv2.imwrite( os.path.join(save_path, str(batch_index) + str(i) + '_Original.jpg'), img) else: cv2.imwrite( os.path.join(save_path, str(batch_index) + str(i) + '_Pred.jpg'), img) def color_images(self, pred, target): imgs = [] for p, t in zip(pred, target): tmp = p * 2 + t np.squeeze(tmp) img = np.zeros((p.shape[0], p.shape[1], 3)) # bkg:negative, building:postive #from IPython import embed #embed() img[np.where(tmp == 0)] = [0, 0, 0] # Black RGB, for true negative img[np.where(tmp == 1)] = [255, 0, 0] # Red RGB, for false negative img[np.where(tmp == 2)] = [0, 255, 0] # Green RGB, for false positive img[np.where(tmp == 3)] = [255, 255, 0] #Yellow RGB, for true positive imgs.append(img) return imgs
class Trainer(object): def __init__(self, config): self.config = config self.best_pred = 0.0 # Define Saver self.saver = Saver(config) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.config['training']['tensorboard']['log_dir']) self.writer = self.summary.create_summary() self.train_loader, self.val_loader, self.test_loader, self.nclass = initialize_data_loader(config) # Define network model = DeepLab(num_classes=self.nclass, backbone=self.config['network']['backbone'], output_stride=self.config['image']['out_stride'], sync_bn=self.config['network']['sync_bn'], freeze_bn=self.config['network']['freeze_bn']) train_params = [{'params': model.get_1x_lr_params(), 'lr': self.config['training']['lr']}, {'params': model.get_10x_lr_params(), 'lr': self.config['training']['lr'] * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=self.config['training']['momentum'], weight_decay=self.config['training']['weight_decay'], nesterov=self.config['training']['nesterov']) # Define Criterion # whether to use class balanced weights if self.config['training']['use_balanced_weights']: classes_weights_path = os.path.join(self.config['dataset']['base_path'], self.config['dataset']['dataset_name'] + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(self.config, self.config['dataset']['dataset_name'], self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=self.config['network']['use_cuda']).build_loss(mode=self.config['training']['loss_type']) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(self.config['training']['lr_scheduler'], self.config['training']['lr'], self.config['training']['epochs'], len(self.train_loader)) # Using cuda if self.config['network']['use_cuda']: self.model = torch.nn.DataParallel(self.model) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint if self.config['training']['weights_initialization']['use_pretrained_weights']: if not os.path.isfile(self.config['training']['weights_initialization']['restore_from']): raise RuntimeError("=> no checkpoint found at '{}'" .format(self.config['training']['weights_initialization']['restore_from'])) if self.config['network']['use_cuda']: checkpoint = torch.load(self.config['training']['weights_initialization']['restore_from']) else: checkpoint = torch.load(self.config['training']['weights_initialization']['restore_from'], map_location={'cuda:0': 'cpu'}) self.config['training']['start_epoch'] = checkpoint['epoch'] if self.config['network']['use_cuda']: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) # if not self.config['ft']: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(self.config['training']['weights_initialization']['restore_from'], checkpoint['epoch'])) def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.config['network']['use_cuda']: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.config['dataset']['dataset_name'], image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config['training']['batch_size'] + image.data.shape[0])) print('Loss: %.3f' % train_loss) #save last checkpoint self.saver.save_checkpoint({ 'epoch': epoch + 1, # 'state_dict': self.model.module.state_dict(), 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best = False, filename='checkpoint_last.pth.tar') #if training on a subset reshuffle the data if self.config['training']['train_on_subset']['enabled']: self.train_loader.dataset.shuffle_dataset() def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.config['network']['use_cuda']: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Val loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config['training']['batch_size'] + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, # 'state_dict': self.model.module.state_dict(), 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best = True, filename='checkpoint_best.pth.tar')
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) print(self.nclass, args.backbone, args.out_stride, args.sync_bn, args.freeze_bn) #2 resnet 16 False False train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): #image, target = sample['image'], sample['label'] image, target = sample['trace'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch #self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['trace'], sample['label'] #image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion self.criterion = SegmentationLosses(cuda=args.cuda) self.model, self.optimizer = model, optimizer self.contexts = TemporalContexts(history_len=5) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning or in validation/test mode if args.ft or args.mode == "val" or args.mode == "test": args.start_epoch = 0 self.best_pred = 0.0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, region_prop, target = sample['image'], sample['rp'], sample[ 'label'] if self.args.cuda: image, region_prop, target = image.cuda(), region_prop.cuda( ), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image, region_prop) loss = self.criterion.CrossEntropyLoss( output, target, weight=torch.from_numpy( calculate_weights_batch(sample, self.nclass).astype(np.float32))) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) pred = output.clone().data.cpu() pred_softmax = F.softmax(pred, dim=1).numpy() pred = np.argmax(pred.numpy(), axis=1) # Plot prediction every 20th iter if i % (num_img_tr // 20) == 0: global_step = i + num_img_tr * epoch self.summary.vis_grid(self.writer, self.args.dataset, image.data.cpu().numpy()[0], target.data.cpu().numpy()[0], pred[0], region_prop.data.cpu().numpy()[0], pred_softmax[0], global_step, split="Train") self.writer.add_scalar('train/total_loss_epoch', train_loss / num_img_tr, epoch) print('Loss: {}'.format(train_loss / num_img_tr)) if self.args.no_val or self.args.save_all: # save checkpoint every epoch is_best = False self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best, filename='checkpoint_' + str(epoch + 1) + '_.pth.tar') def validation(self, epoch): if self.args.mode == "train" or self.args.mode == "val": loader = self.val_loader elif self.args.mode == "test": loader = self.test_loader self.model.eval() self.evaluator.reset() tbar = tqdm(loader, desc='\r') test_loss = 0.0 idr_thresholds = [0.20, 0.30, 0.40, 0.50, 0.60, 0.65] num_itr = len(loader) for i, sample in enumerate(tbar): image, region_prop, target = sample['image'], sample['rp'], sample[ 'label'] # orig_region_prop = region_prop.clone() # region_prop = self.contexts.temporal_prop(image.numpy(),region_prop.numpy()) if self.args.cuda: image, region_prop, target = image.cuda(), region_prop.cuda( ), target.cuda() with torch.no_grad(): output = self.model(image, region_prop) # loss = self.criterion.CrossEntropyLoss(output,target,weight=torch.from_numpy(calculate_weights_batch(sample,self.nclass).astype(np.float32))) # test_loss += loss.item() # tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) output = output.detach().data.cpu() pred_softmax = F.softmax(output, dim=1).numpy() pred = np.argmax(pred_softmax, axis=1) target = target.cpu().numpy() image = image.cpu().numpy() region_prop = region_prop.cpu().numpy() # orig_region_prop = orig_region_prop.numpy() # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Append buffer with original context(before temporal propagation) # self.contexts.append_buffer(image[0],orig_region_prop[0],pred[0]) global_step = i + num_itr * epoch self.summary.vis_grid(self.writer, self.args.dataset, image[0], target[0], pred[0], region_prop[0], pred_softmax[0], global_step, split="Validation") # Fast test during the training mIoU = self.evaluator.Mean_Intersection_over_Union() recall, precision = self.evaluator.pdr_metric(class_id=2) idr_avg = np.array([ self.evaluator.get_idr(class_value=2, threshold=value) for value in idr_thresholds ]) false_idr = self.evaluator.get_false_idr(class_value=2) instance_iou = self.evaluator.get_instance_iou(threshold=0.20, class_value=2) # self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Recall/per_epoch', recall, epoch) self.writer.add_scalar('IDR/per_epoch(0.20)', idr_avg[0], epoch) self.writer.add_scalar('IDR/avg_epoch', np.mean(idr_avg), epoch) self.writer.add_scalar('False_IDR/epoch', false_idr, epoch) self.writer.add_scalar('Instance_IOU/epoch', instance_iou, epoch) self.writer.add_histogram( 'Prediction_hist', self.evaluator.pred_labels[self.evaluator.gt_labels == 2], epoch) print('Validation:') # print('Loss: %.3f' % test_loss) # print('Recall/PDR:{}'.format(recall)) print('IDR:{}'.format(idr_avg[0])) print('False Positive Rate: {}'.format(false_idr)) print('Instance_IOU: {}'.format(instance_iou)) if self.args.mode == "train": new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) else: pass
class Trainer(object): def __init__(self, config, args): self.args = args self.config = config self.vis = visdom.Visdom(env=os.getcwd().split('/')[-1]) # Define Dataloader self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( config) self.gpu = args.gpu # labels for adversarial training self.gt_label = 0 self.prediction_label = 1 self.argmax = ArgMax() # Define network model = DeepLab(num_classes=self.nclass, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) model_D = FCDiscriminator(num_classes=self.nclass) model_D.train() train_params = [{ 'params': model.get_1x_lr_params(), 'lr': config.lr }, { 'params': model.get_10x_lr_params(), 'lr': config.lr * 10 }] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=config.momentum, weight_decay=config.weight_decay) optimizer_D = torch.optim.Adam(model_D.parameters(), lr=1e-4, betas=(0.9, 0.99)) optimizer_D.zero_grad() # Define Criterion # whether to use class balanced weights self.criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=config.loss) self.criterion_D = torch.nn.BCEWithLogitsLoss() self.model, self.optimizer = model, optimizer self.model_D, self.optimizer_D = model_D, optimizer_D # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(config.lr_scheduler, config.lr, config.epochs, len(self.train_loader), config.lr_step, config.warmup_epochs) # Using cuda if args.cuda: # self.model = torch.nn.DataParallel(self.model) # patch_replication_callback(self.model) # self.model_D = torch.nn.DataParallel(self.model) # patch_replication_callback(self.model_D) # cudnn.benchmark = True self.model = self.model.cuda() self.model_D = self.model_D.cuda() self.argmax = self.argmax # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) if args.cuda: self.model.load_state_dict(checkpoint) else: self.model.load_state_dict(checkpoint, map_location=torch.device('cpu')) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch)) def training(self, epoch): tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) seg_loss = 0.0 adv_loss = 0.0 D_loss = 0.0 self.model.train() for i, sample in enumerate(tbar): iter = epoch * len(self.train_loader) + i self.vis.line(X=torch.tensor([iter]), Y=torch.tensor( [self.optimizer.param_groups[0]['lr']]), win='lr_seg', opts=dict(title='lr', xlabel='iter', ylabel='lr'), update='append' if iter > 0 else None) self.vis.line(X=torch.tensor([iter]), Y=torch.tensor( [self.optimizer_D.param_groups[0]['lr']]), win='lr_adv', opts=dict(title='lr', xlabel='iter', ylabel='lr'), update='append' if iter > 0 else None) image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() self.optimizer_D.zero_grad() # train the segmentation network # don't accumulate grads in D for param in self.model_D.parameters(): param.requires_grad = False # seg loss output = self.model(image) loss1 = self.criterion(output, target) loss1.backward() self.optimizer.step() seg_loss += loss1.item() # adv loss print(self.argmax.apply(output)) D_out = self.model_D(self.argmax.apply(output)) loss2 = self.criterion_D( D_out, Variable( torch.FloatTensor(D_out.data.size()).fill_( self.gt_label)).cuda(self.gpu)) loss2.backward() self.optimizer_D.step() adv_loss += loss2.item() # train the discriminator # bring back requires_grad for param in self.model_D.parameters(): param.requires_grad = True # train_with_prediction output = output.detach() D_out1 = self.model_D(self.argmax.apply(output)) loss_D1 = self.criterion_D( D_out1, Variable( torch.FloatTensor(D_out1.data.size()).fill_( self.prediction_label)).cuda(self.gpu)) loss_D1.backward() D_loss += loss_D1.data.cpu().numpy() # train with gt D_out2 = self.model_D(target) loss_D2 = self.criterion_D( D_out2, Variable( torch.FloatTensor(D_out2.data.size()).fill_( self.gt_label)).cuda(self.gpu)) loss_D2.backward() D_loss += loss_D2.data.cpu().numpy() tbar.set_description('[Train] Seg loss: %.3f, Adv loss: %.3f, D loss: %.3f' \ % (seg_loss / (i + 1), adv_loss / (i + 1), D_loss / (i + 1))) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config.batch_size + image.data.shape[0])) print('Seg loss: %.3f, Adv loss: %.3f, D loss: %.3f' \ % (seg_loss / (i + 1), adv_loss / (i + 1), D_loss / (i + 1))) self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([seg_loss]), win='seg_loss', name='train', opts=dict(title='loss', xlabel='epoch', ylabel='loss'), update='append' if epoch > 0 else None) self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([adv_loss]), win='adv_loss', name='train', opts=dict(title='loss', xlabel='epoch', ylabel='loss'), update='append' if epoch > 0 else None) self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([D_loss]), win='D_loss', name='train', opts=dict(title='loss', xlabel='epoch', ylabel='loss'), update='append' if epoch > 0 else None) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) seg_loss = self.criterion(output, target) seg_loss += seg_loss.item() output = output.detach() D_out = self.model_D(self.argmax(output)) adv_loss = self.criterion_D( D_out, Variable( torch.FloatTensor(D_out.data.size()).fill_( self.gt_label)).cuda(self.gpu)) tbar.set_description('[Test] Seg loss: %.3f, Adv loss: %.3f' % (seg_loss / (i + 1), adv_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Building_Acc() # Acc_class = self.evaluator.Pixel_Accuracy_Class() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() # FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.config.batch_size + image.data.shape[0])) print("Acc:{}, IoU:{}, mIoU:{}".format(Acc, IoU, mIoU)) print('Seg loss: %.3f, Adv loss: %.3f' % (seg_loss / (i + 1), adv_loss / (i + 1))) self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([seg_loss]), win='seg_loss', name='val', update='append') self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([adv_loss]), win='adv_loss', name='val', update='append') self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([Acc]), win='metrics', name='acc', opts=dict(title='metrics', xlabel='epoch', ylabel='performance'), update='append' if epoch > 0 else None) self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([IoU]), win='metrics', name='IoU', update='append') self.vis.line(X=torch.tensor([epoch]), Y=torch.tensor([mIoU]), win='metrics', name='mIoU', update='append') new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred print('Saving state, epoch:', epoch) torch.save( self.model.state_dict(), self.args.save_folder + 'models/' + 'epoch' + str(epoch) + '.pth') loss_file = {'Acc': Acc, 'IoU': IoU, 'mIoU': mIoU} with open( os.path.join(self.args.save_folder, 'eval', 'epoch' + str(epoch) + '.json'), 'w') as f: json.dump(loss_file, f)
class Trainer(object): def __init__(self, args): self.args = args self.vs = Vs(args.dataset) # Define Dataloader kwargs = {"num_workers": args.workers, "pin_memory": True} ( self.train_loader, self.val_loader, self.test_loader, self.nclass, ) = make_data_loader(args, **kwargs) if self.args.norm == "gn": norm = gn elif self.args.norm == "bn": if self.args.sync_bn: norm = syncbn else: norm = bn elif self.args.norm == "abn": if self.args.sync_bn: norm = syncabn(self.args.gpu_ids) else: norm = abn else: print("Please check the norm.") exit() # Define network if self.args.model == "deeplabv3+": model = DeepLab(args=self.args, num_classes=self.nclass, freeze_bn=args.freeze_bn) elif self.args.model == "deeplabv3": model = DeepLabv3( Norm=args.norm, backbone=args.backbone, output_stride=args.out_stride, num_classes=self.nclass, freeze_bn=args.freeze_bn, ) elif self.args.model == "fpn": model = FPN(args=args, num_classes=self.nclass) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + "_classes_weights.npy") if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model = model # Define Evaluator self.evaluator = Evaluator(self.nclass) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint["epoch"] if args.cuda: self.model.module.load_state_dict(checkpoint["state_dict"]) else: self.model.load_state_dict(checkpoint["state_dict"]) self.best_pred = checkpoint["best_pred"] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint["epoch"])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def test(self): self.model.eval() self.args.examine = False tbar = tqdm(self.test_loader, desc="\r") if self.args.color: __image = True else: __image = False for i, sample in enumerate(tbar): images = sample["image"] names = sample["name"] if self.args.cuda: images = images.cuda() with torch.no_grad(): output = self.model(images) preds = output.data.cpu().numpy() preds = np.argmax(preds, axis=1) if __image: images = images.cpu().numpy() if not self.args.color: self.vs.predict_id(preds, names, self.args.save_dir) else: self.vs.predict_color(preds, images, names, self.args.save_dir) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc="\r") test_loss = 0.0 if self.args.color or self.args.examine: __image = True else: __image = False for i, sample in enumerate(tbar): images, targets = sample["image"], sample["label"] names = sample["name"] if self.args.cuda: images, targets = images.cuda(), targets.cuda() with torch.no_grad(): output = self.model(images) loss = self.criterion(output, targets) test_loss += loss.item() tbar.set_description("Test loss: %.3f" % (test_loss / (i + 1))) preds = output.data.cpu().numpy() targets = targets.cpu().numpy() preds = np.argmax(preds, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(targets, preds) if __image: images = images.cpu().numpy() if self.args.id: self.vs.predict_id(preds, names, self.args.save_dir) if self.args.color: self.vs.predict_color(preds, images, names, self.args.save_dir) if self.args.examine: self.vs.predict_examine(preds, targets, images, names, self.args.save_dir) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() print("Validation:") # print( # "[Epoch: %d, numImages: %5d]" # % (epoch, i * self.args.batch_size + image.data.shape[0]) # ) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print("Loss: %.3f" % test_loss)
class Trainer(object): def __init__(self, args): self.args = args # Generate .npy file for dataloader self.img_process(args) # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) # Define network model = getattr(modeling, args.model_name)(pretrained=args.pretrained) # Define Optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, # {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Criterion self.criterion = SegmentationLosses( weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 # 将大图按unit_size的大小,每次stride的移动量进行裁剪。将分好的训练集和验证机以np数组形式存储在save_dir中, # 方便下次使用,并减少内存的占用。请将路径修改为自己的。 def img_process(self, args): unit_size = args.base_size stride = unit_size # int(unit_size/2) save_dir = os.path.join( '/data/dingyifeng/pytorch-jingwei-master/npy_process', str(unit_size)) # npy_process if not os.path.exists(save_dir): Image.MAX_IMAGE_PIXELS = 100000000000 # load train image 1 img = Image.open( '/data/dingyifeng/jingwei/jingwei_round1_train_20190619/image_1.png' ) img = np.asarray(img) #(50141, 47161, 4) anno_map = Image.open( '/data/dingyifeng/jingwei/jingwei_round1_train_20190619/image_1_label.png' ) anno_map = np.asarray(anno_map) #(50141, 47161) length, width = img.shape[0], img.shape[1] x1, x2, y1, y2 = 0, unit_size, 0, unit_size Img1 = [] # 保存小图的数组 Label1 = [] # 保存label的数组 while (x1 < length): #判断横向是否越界 if x2 > length: x2, x1 = length, length - unit_size while (y1 < width): if y2 > width: y2, y1 = width, width - unit_size im = img[x1:x2, y1:y2, :] if 255 in im[:, :, -1]: # 判断裁剪出来的小图中是否存在有像素点 Img1.append(im[:, :, 0:3]) # 添加小图 Label1.append(anno_map[x1:x2, y1:y2]) # 添加label if y2 == width: break y1 += stride y2 += stride if x2 == length: break y1, y2 = 0, unit_size x1 += stride x2 += stride Img1 = np.array(Img1) #(4123, 448, 448, 3) Label1 = np.array(Label1) #(4123, 448, 448) # load train image 2 img = Image.open( '/data/dingyifeng/jingwei/jingwei_round1_train_20190619/image_2.png' ) img = np.asarray(img) #(50141, 47161, 4) anno_map = Image.open( '/data/dingyifeng/jingwei/jingwei_round1_train_20190619/image_2_label.png' ) anno_map = np.asarray(anno_map) #(50141, 47161) length, width = img.shape[0], img.shape[1] x1, x2, y1, y2 = 0, unit_size, 0, unit_size Img2 = [] # 保存小图的数组 Label2 = [] # 保存label的数组 while (x1 < length): #判断横向是否越界 if x2 > length: x2, x1 = length, length - unit_size while (y1 < width): if y2 > width: y2, y1 = width, width - unit_size im = img[x1:x2, y1:y2, :] if 255 in im[:, :, -1]: # 判断裁剪出来的小图中是否存在有像素点 Img2.append(im[:, :, 0:3]) # 添加小图 Label2.append(anno_map[x1:x2, y1:y2]) # 添加label if y2 == width: break y1 += stride y2 += stride if x2 == length: break y1, y2 = 0, unit_size x1 += stride x2 += stride Img2 = np.array(Img2) #(5072, 448, 448, 3) Label2 = np.array(Label2) #(5072, 448, 448) Img = np.concatenate((Img1, Img2), axis=0) cat = np.concatenate((Label1, Label2), axis=0) # shuffle np.random.seed(1) assert (Img.shape[0] == cat.shape[0]) shuffle_id = np.arange(Img.shape[0]) np.random.shuffle(shuffle_id) Img = Img[shuffle_id] cat = cat[shuffle_id] os.mkdir(save_dir) print("=> generate {}".format(unit_size)) # split train dataset images_train = Img #[:int(Img.shape[0]*0.8)] categories_train = cat #[:int(cat.shape[0]*0.8)] assert (len(images_train) == len(categories_train)) np.save(os.path.join(save_dir, 'train_img.npy'), images_train) np.save(os.path.join(save_dir, 'train_label.npy'), categories_train) # split val dataset images_val = Img[int(Img.shape[0] * 0.8):] categories_val = cat[int(cat.shape[0] * 0.8):] assert (len(images_val) == len(categories_val)) np.save(os.path.join(save_dir, 'val_img.npy'), images_val) np.save(os.path.join(save_dir, 'val_label.npy'), categories_val) print("=> img_process finished!") else: print("{} file already exists!".format(unit_size)) for x in locals().keys(): del locals()[x] # 释放内存 import gc gc.collect() def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Trainer(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader(args, **kwargs) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join(Path.db_root_dir(args.dataset), args.dataset+'_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses(weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer if args.densecrfloss >0: self.densecrflosslayer = DenseCRFLoss(weight=args.densecrfloss, sigma_rgb=args.sigma_rgb, sigma_xy=args.sigma_xy, scale_factor=args.rloss_scale) print(self.densecrflosslayer) # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) # Using cuda if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 train_celoss = 0.0 train_crfloss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) softmax = nn.Softmax(dim=1) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] croppings = (target!=254).float() target[target==254]=255 # Pixels labeled 255 are those unlabeled pixels. Padded region are labeled 254. # see function RandomScaleCrop in dataloaders/custom_transforms.py for the detail in data preprocessing if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) celoss = self.criterion(output, target) if self.args.densecrfloss ==0: loss = celoss else: max_output = (max(torch.abs(torch.max(output)), torch.abs(torch.min(output)))) mean_output = torch.mean(torch.abs(output)).item() # std_output = torch.std(output).item() probs = softmax(output) # /max_output*4 denormalized_image = denormalizeimage(sample['image'], mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) densecrfloss = self.densecrflosslayer(denormalized_image,probs,croppings) if self.args.cuda: densecrfloss = densecrfloss.cuda() loss = celoss + densecrfloss train_crfloss += densecrfloss.item() logits_copy = output.detach().clone().requires_grad_(True) max_output_copy = (max(torch.abs(torch.max(logits_copy)), torch.abs(torch.min(logits_copy)))) probs_copy = softmax(logits_copy) # /max_output_copy*4 denormalized_image_copy = denormalized_image.detach().clone() croppings_copy = croppings.detach().clone() densecrfloss_copy = self.densecrflosslayer(denormalized_image_copy, probs_copy, croppings) @torch.no_grad() def add_grad_map(grad, plot_name): if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch batch_grads = torch.max(torch.abs(grad), dim=1)[0].detach().cpu().numpy() color_imgs = [] for grad_img in batch_grads: grad_img[0,0]=0 img = colorize(grad_img)[:,:,:3] color_imgs.append(img) color_imgs = torch.from_numpy(np.array(color_imgs).transpose([0, 3, 1, 2])) grid_image = make_grid(color_imgs[:3], 3, normalize=False, range=(0, 255)) self.writer.add_image(plot_name, grid_image, global_step) output.register_hook(lambda grad: add_grad_map(grad, 'Grad Logits')) probs.register_hook(lambda grad: add_grad_map(grad, 'Grad Probs')) logits_copy.register_hook(lambda grad: add_grad_map(grad, 'Grad Logits Rloss')) densecrfloss_copy.backward() if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch img_entropy = torch.sum(-probs*torch.log(probs+1e-9), dim=1).detach().cpu().numpy() color_imgs = [] for e in img_entropy: e[0,0] = 0 img = colorize(e)[:,:,:3] color_imgs.append(img) color_imgs = torch.from_numpy(np.array(color_imgs).transpose([0, 3, 1, 2])) grid_image = make_grid(color_imgs[:3], 3, normalize=False, range=(0, 255)) self.writer.add_image('Entropy', grid_image, global_step) self.writer.add_histogram('train/total_loss_iter/logit_histogram', output, i + num_img_tr * epoch) self.writer.add_histogram('train/total_loss_iter/probs_histogram', probs, i + num_img_tr * epoch) self.writer.add_scalar('train/total_loss_iter/rloss', densecrfloss.item(), i + num_img_tr * epoch) self.writer.add_scalar('train/total_loss_iter/max_output', max_output.item(), i + num_img_tr * epoch) self.writer.add_scalar('train/total_loss_iter/mean_output', mean_output, i + num_img_tr * epoch) loss.backward() self.optimizer.step() train_loss += loss.item() train_celoss += celoss.item() tbar.set_description('Train loss: %.3f = CE loss %.3f + CRF loss: %.3f' % (train_loss / (i + 1),train_celoss / (i + 1),train_crfloss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) self.writer.add_scalar('train/total_loss_iter/ce', celoss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) #if self.args.no_val: if self.args.save_interval: # save checkpoint every interval epoch is_best = False if (epoch + 1) % self.args.save_interval == 0: self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best, filename='checkpoint_epoch_{}.pth.tar'.format(str(epoch+1))) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] target[target==254]=255 if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Trainer(object): def __init__(self, args, dataloaders, mc_dropout): self.args = args self.mc_dropout = mc_dropout self.train_loader, self.val_loader, self.test_loader, self.nclass = dataloaders def setup_saver_and_summary(self, num_current_labeled_samples, samples, experiment_group=None, regions=None): self.saver = ActiveSaver(self.args, num_current_labeled_samples, experiment_group=experiment_group) self.saver.save_experiment_config() self.saver.save_active_selections(samples, regions) self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() def initialize(self): args = self.args if args.architecture == 'deeplab': print('Using Deeplab') model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] elif args.architecture == 'enet': print('Using ENet') model = ENet(num_classes=self.nclass, encoder_relu=True, decoder_relu=True) train_params = [{'params': model.parameters(), 'lr': args.lr}] elif args.architecture == 'fastscnn': print('Using FastSCNN') model = FastSCNN(3, self.nclass) train_params = [{'params': model.parameters(), 'lr': args.lr}] if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.evaluator = Evaluator(self.nclass) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0 def training(self, epoch): train_loss = 0.0 self.model.train() num_img_tr = len(self.train_loader) tbar = tqdm(self.train_loader, desc='\r') for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() if self.scheduler: self.scheduler(self.optimizer, i, epoch, self.best_pred) self.writer.add_scalar('train/learning_rate', self.scheduler.current_lr, i + num_img_tr * epoch) self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) print('BestPred: %.3f' % self.best_pred) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) return train_loss def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 visualization_index = int(random.random() * len(self.val_loader)) vis_img = None vis_tgt = None vis_out = None for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) if i == visualization_index: vis_img = image vis_tgt = target vis_out = output loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU is_best = False if new_pred > self.best_pred: is_best = True self.best_pred = new_pred # save every validation model (overwrites) self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) return test_loss, mIoU, Acc, Acc_class, FWIoU, [ vis_img, vis_tgt, vis_out ]
class trainNew(object): def __init__(self, args): self.args = args # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Define Dataloader kwargs = {'num_workers': args.workers, 'pin_memory': True} self.train_loader, self.val_loader, self.test_loader, self.nclass = make_data_loader( args, **kwargs) cell_path = os.path.join(args.saved_arch_path, 'genotype.npy') network_path_space = os.path.join(args.saved_arch_path, 'network_path_space.npy') new_cell_arch = np.load(cell_path) new_network_arch = np.load(network_path_space) # Define network model = newModel(network_arch=new_network_arch, cell_arch=new_cell_arch, num_classes=self.nclass, num_layers=12) # output_stride=args.out_stride, # sync_bn=args.sync_bn, # freeze_bn=args.freeze_bn) self.decoder = Decoder(self.nclass, 'autodeeplab', args, False) # TODO: look into these # TODO: ALSO look into different param groups as done int deeplab below # train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, # {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # train_params = [{'params': model.parameters(), 'lr': args.lr}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # Define Criterion # whether to use class balanced weights if args.use_balanced_weights: classes_weights_path = os.path.join( Path.db_root_dir(args.dataset), args.dataset + '_classes_weights.npy') if os.path.isfile(classes_weights_path): weight = np.load(classes_weights_path) else: weight = calculate_weigths_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler( args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) #TODO: use min_lr ? # TODO: Figure out if len(self.train_loader) should be devided by two ? in other module as well # Using cuda if args.cuda: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model = torch.nn.DataParallel(self.model.cuda()) patch_replication_callback(self.model) self.model = self.model.cuda() print('cuda finished') # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] # if the weights are wrapped in module object we have to clean it if args.clean_module: self.model.load_state_dict(checkpoint['state_dict']) state_dict = checkpoint['state_dict'] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module.' of dataparallel new_state_dict[name] = v self.model.load_state_dict(new_state_dict) else: if (torch.cuda.device_count() > 1 or args.load_parallel): self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 self.model.train() tbar = tqdm(self.train_loader) num_img_tr = len(self.train_loader) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, i, epoch, self.best_pred) self.optimizer.zero_grad() encoder_output, low_level_feature = self.model(image) output = self.decoder(encoder_output, low_level_feature) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.writer.add_scalar('train/total_loss_iter', loss.item(), i + num_img_tr * epoch) # Show 10 * 3 inference results each epoch if i % (num_img_tr // 10) == 0: global_step = i + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() tbar = tqdm(self.val_loader, desc='\r') test_loss = 0.0 for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): encoder_output, low_level_feature = self.model(image) output = self.decoder(encoder_output, low_level_feature) loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', test_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, fwIoU: {}".format( Acc, Acc_class, mIoU, FWIoU)) print('Loss: %.3f' % test_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Trainer(object): def __init__(self, batch_size=32, optimizer_name="Adam", lr=1e-3, weight_decay=1e-5, epochs=200, model_name="model01", gpu_ids=None, resume=None, tqdm=None, is_develop=False): """ args: batch_size = (int) batch_size of training and validation lr = (float) learning rate of optimization weight_decay = (float) weight decay of optimization epochs = (int) The number of epochs of training model_name = (string) The name of training model. Will be folder name. gpu_ids = (List) List of gpu_ids. (e.g. gpu_ids = [0, 1]). Use CPU, if it is None. resume = (Dict) Dict of some settings. (resume = {"checkpoint_path":PATH_of_checkpoint, "fine_tuning":True or False}). Learn from scratch, if it is None. tqdm = (tqdm Object) progress bar object. Set your tqdm please. Don't view progress bar, if it is None. """ # Set params self.batch_size = batch_size self.epochs = epochs self.start_epoch = 0 self.use_cuda = (gpu_ids is not None) and torch.cuda.is_available self.tqdm = tqdm self.use_tqdm = tqdm is not None # Define Utils. (No need to Change.) """ These are Project Modules. You may not have to change these. Saver: Save model weight. / <utils.saver.Saver()> TensorboardSummary: Write tensorboard file. / <utils.summaries.TensorboardSummary()> Evaluator: Calculate some metrics (e.g. Accuracy). / <utils.metrics.Evaluator()> """ ## ***Define Saver*** self.saver = Saver(model_name, lr, epochs) self.saver.save_experiment_config() ## ***Define Tensorboard Summary*** self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # ------------------------- # # Define Training components. (You have to Change!) """ These are important setting for training. You have to change these. make_data_loader: This creates some <Dataloader>s. / <dataloader.__init__> Modeling: You have to define your Model. / <modeling.modeling.Modeling()> Evaluator: You have to define Evaluator. / <utils.metrics.Evaluator()> Optimizer: You have to define Optimizer. / <utils.optimizer.Optimizer()> Loss: You have to define Loss function. / <utils.loss.Loss()> """ ## ***Define Dataloader*** self.train_loader, self.val_loader, self.test_loader, self.num_classes = make_data_loader( batch_size, is_develop=is_develop) ## ***Define Your Model*** self.model = Modeling(self.num_classes) ## ***Define Evaluator*** self.evaluator = Evaluator(self.num_classes) ## ***Define Optimizer*** self.optimizer = Optimizer(self.model.parameters(), optimizer_name=optimizer_name, lr=lr, weight_decay=weight_decay) ## ***Define Loss*** self.criterion = SegmentationLosses( weight=torch.tensor([1.0, 1594.0]).cuda()).build_loss('ce') # self.criterion = SegmentationLosses().build_loss('focal') # self.criterion = BCEDiceLoss() # ------------------------- # # Some settings """ You don't have to touch bellow code. Using cuda: Enable to use cuda if you want. Resuming checkpoint: You can resume training if you want. """ ## ***Using cuda*** if self.use_cuda: self.model = torch.nn.DataParallel(self.model, device_ids=gpu_ids).cuda() ## ***Resuming checkpoint*** """You can ignore bellow code.""" self.best_pred = 0.0 if resume is not None: if not os.path.isfile(resume["checkpoint_path"]): raise RuntimeError("=> no checkpoint found at '{}'".format( resume["checkpoint_path"])) checkpoint = torch.load(resume["checkpoint_path"]) self.start_epoch = checkpoint['epoch'] if self.use_cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if resume["fine_tuning"]: # resume params of optimizer, if run fine tuning. self.optimizer.load_state_dict(checkpoint['optimizer']) self.start_epoch = 0 self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})".format( resume["checkpoint_path"], checkpoint['epoch'])) def _run_epoch(self, epoch, mode="train", leave_progress=True, use_optuna=False): """ run training or validation 1 epoch. You don't have to change almost of this method. args: epoch = (int) How many epochs this time. mode = {"train" or "val"} leave_progress = {True or False} Can choose whether leave progress bar or not. use_optuna = {True or False} Can choose whether use optuna or not. Change point (if you need): - Evaluation: You can change metrics of monitoring. - writer.add_scalar: You can change metrics to be saved in tensorboard. """ # ------------------------- # leave_progress = leave_progress and not use_optuna # Initializing epoch_loss = 0.0 ## Set model mode & tqdm (progress bar; it wrap dataloader) assert (mode == "train") or ( mode == "val" ), "argument 'mode' can be 'train' or 'val.' Not {}.".format(mode) if mode == "train": data_loader = self.tqdm( self.train_loader, leave=leave_progress) if self.use_tqdm else self.train_loader self.model.train() num_dataset = len(self.train_loader) elif mode == "val": data_loader = self.tqdm( self.val_loader, leave=leave_progress) if self.use_tqdm else self.val_loader self.model.eval() num_dataset = len(self.val_loader) ## Reset confusion matrix of evaluator self.evaluator.reset() # ------------------------- # # Run 1 epoch for i, sample in enumerate(data_loader): ## ***Get Input data*** inputs, target = sample["input"], sample["label"] if self.use_cuda: inputs, target = inputs.cuda(), target.cuda() ## ***Calculate Loss <Train>*** if mode == "train": self.optimizer.zero_grad() output = self.model(inputs) loss = self.criterion(output, target) loss.backward() self.optimizer.step() ## ***Calculate Loss <Validation>*** elif mode == "val": with torch.no_grad(): output = self.model(inputs) loss = self.criterion(output, target) epoch_loss += loss.item() ## ***Report results*** if self.use_tqdm: data_loader.set_description('{} loss: {:.3f}'.format( mode, epoch_loss / (i + 1))) ## ***Add batch results into evaluator*** target = target.cpu().numpy() output = torch.argmax(output, axis=1).data.cpu().numpy() self.evaluator.add_batch(target, output) ## **********Evaluate Score********** """You can add new metrics! <utils.metrics.Evaluator()>""" # Acc = self.evaluator.Accuracy() miou = self.evaluator.Mean_Intersection_over_Union() if not use_optuna: ## ***Save eval into Tensorboard*** self.writer.add_scalar('{}/loss_epoch'.format(mode), epoch_loss / (i + 1), epoch) # self.writer.add_scalar('{}/Acc'.format(mode), Acc, epoch) self.writer.add_scalar('{}/miou'.format(mode), miou, epoch) print('Total {} loss: {:.3f}'.format(mode, epoch_loss / num_dataset)) print("{0} mIoU:{1:.2f}".format(mode, miou)) # Return score to watch. (update checkpoint or optuna's objective) return miou def run(self, leave_progress=True, use_optuna=False): """ Run all epochs of training and validation. """ for epoch in tqdm(range(self.start_epoch, self.epochs)): print(pycolor.GREEN + "[Epoch: {}]".format(epoch) + pycolor.END) ## ***Train*** print(pycolor.YELLOW + "Training:" + pycolor.END) self._run_epoch(epoch, mode="train", leave_progress=leave_progress, use_optuna=use_optuna) ## ***Validation*** print(pycolor.YELLOW + "Validation:" + pycolor.END) score = self._run_epoch(epoch, mode="val", leave_progress=leave_progress, use_optuna=use_optuna) print("---------------------") if score > self.best_pred: print("model improve best score from {:.4f} to {:.4f}.".format( self.best_pred, score)) self.best_pred = score self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }) self.writer.close() return self.best_pred
class Trainer(object): def __init__(self, args): self.args = args self.train_dir = './data_list/train_lite.csv' self.train_list = pd.read_csv(self.train_dir) self.val_dir = './data_list/val_lite.csv' self.val_list = pd.read_csv(self.val_dir) self.train_length = len(self.train_list) self.val_length = len(self.val_list) # Define Saver self.saver = Saver(args) self.saver.save_experiment_config() # Define Tensorboard Summary self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # 方式2 self.train_gen, self.val_gen, self.test_gen, self.nclass = make_data_loader2(args) # Define network model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] # Define Optimizer optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) # optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) # Define Criterion # self.criterion = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode=args.loss_type) self.criterion1 = SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode='ce') self.criterion2= SegmentationLosses(weight=None, cuda=args.cuda).build_loss(mode='dice') self.model, self.optimizer = model, optimizer # Define Evaluator self.evaluator = Evaluator(self.nclass) # Define lr scheduler self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, self.train_length) # Using cuda if args.cuda: self.model = self.model.cuda() # Resuming checkpoint self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError("=> no checkpoint found at '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: # self.model.module.load_state_dict(checkpoint['state_dict']) self.model.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) # Clear start epoch if fine-tuning if args.ft: args.start_epoch = 0 def training(self, epoch): train_loss = 0.0 prev_time = time.time() self.model.train() self.evaluator.reset() num_img_tr = self.train_length / self.args.batch_size for iteration in range(int(num_img_tr)): samples = next(self.train_gen) image, target = samples['image'], samples['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() self.scheduler(self.optimizer, iteration, epoch, self.best_pred) self.optimizer.zero_grad() output = self.model(image) loss1 = self.criterion1(output, target) loss2 = self.criterion2(output, make_one_hot(target.long(), num_classes=self.nclass)) loss = loss1 + loss2 loss.backward() self.optimizer.step() train_loss += loss.item() self.writer.add_scalar('train/total_loss_iter', loss.item(), iteration + num_img_tr * epoch) # print log 默认log_iters = 4 if iteration % 4 == 0: end_time = time.time() print("Iter - %d: train loss: %.3f, celoss: %.4f, diceloss: %.4f, time cost: %.3f s" \ % (iteration, loss.item(), loss1.item(), loss2.item(), end_time - prev_time)) prev_time = time.time() # Show 10 * 3 inference results each epoch if iteration % (num_img_tr // 10) == 0: global_step = iteration + num_img_tr * epoch self.summary.visualize_image(self.writer, self.args.dataset, image, target, output, global_step) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) print("input image shape/iter:", image.shape) # train evaluate Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() IoU = self.evaluator.Mean_Intersection_over_Union() mIoU = np.nanmean(IoU) FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() print("Acc_tr:{}, Acc_class_tr:{}, IoU_tr:{}, mIoU_tr:{}, fwIoU_tr: {}".format(Acc, Acc_class, IoU, mIoU, FWIoU)) self.writer.add_scalar('train/total_loss_epoch', train_loss, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, iteration * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) if self.args.no_val: # save checkpoint every epoch is_best = False self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best) def validation(self, epoch): self.model.eval() self.evaluator.reset() val_loss = 0.0 prev_time = time.time() num_img_val = self.val_length / self.args.batch_size print("Validation:","epoch ", epoch) print(num_img_val) for iteration in range(int(num_img_val)): samples = next(self.val_gen) image, target = samples['image'], samples['label'] if self.args.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): # output = self.model(image) loss1 = self.criterion1(output, target) loss2 = self.criterion2(output, make_one_hot(target.long(), num_classes=self.nclass)) loss = loss1 + loss2 val_loss += loss.item() self.writer.add_scalar('val/total_loss_iter', loss.item(), iteration + num_img_val * epoch) val_loss += loss.item() # print log 默认log_iters = 4 if iteration % 4 == 0: end_time = time.time() print("Iter - %d: validation loss: %.3f, celoss: %.4f, diceloss: %.4f, time cost: %.3f s" \ % (iteration, loss.item(), loss1.item(), loss2.item(), end_time - prev_time)) prev_time = time.time() pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) print(image.shape) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() IoU = self.evaluator.Mean_Intersection_over_Union() mIoU = np.nanmean(IoU) FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() self.writer.add_scalar('val/total_loss_epoch', val_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) print('Validation:') print('[Epoch: %d, numImages: %5d]' % (epoch, iteration * self.args.batch_size + image.data.shape[0])) print("Acc_val:{}, Acc_class_val:{}, IoU:val:{}, mIoU_val:{}, fwIoU_val: {}".format(Acc, Acc_class, IoU, mIoU, FWIoU)) print('Loss: %.3f' % val_loss) new_pred = mIoU if new_pred > self.best_pred: is_best = True self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, is_best)
class Test: def __init__(self, model_path, config, cuda=False): self.target = config.all_dataset self.target.remove(config.dataset) # load source domain self.source_set = spacenet.Spacenet(city=config.dataset, split='test', img_root=config.img_root) self.source_loader = DataLoader(self.source_set, batch_size=16, shuffle=False, num_workers=2) self.target_set = [] self.target_loader = [] self.target_trainset = [] self.target_trainloader = [] # load other domains for city in self.target: test = spacenet.Spacenet(city=city, split='test', img_root=config.img_root) train = spacenet.Spacenet(city=city, split='train', img_root=config.img_root) self.target_set.append(test) self.target_trainset.append(train) self.target_loader.append( DataLoader(test, batch_size=16, shuffle=False, num_workers=2)) self.target_trainloader.append( DataLoader(train, batch_size=16, shuffle=False, num_workers=2)) self.model = DeepLab(num_classes=2, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) if cuda: self.checkpoint = torch.load(model_path) else: self.checkpoint = torch.load(model_path, map_location=torch.device('cpu')) self.model.load_state_dict(self.checkpoint) self.evaluator = Evaluator(2) self.cuda = cuda if cuda: self.model = self.model.cuda() def get_performance(self, dataloader, trainloader, if_source): # change mean and var of bn to adapt to the target domain if not if_source: self.model.train() for sample in trainloader: image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) # evaluate the model self.model.eval() self.evaluator.reset() tbar = tqdm(dataloader, desc='\r') for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # Fast test during the training Acc = self.evaluator.Building_Acc() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() return Acc, IoU, mIoU def test(self): A, I, Im = self.get_performance(self.source_loader, None, True) tA, tI, tIm = [], [], [] for dl, tl in zip(self.target_loader, self.target_trainloader): tA_, tI_, tIm_ = self.get_performance(dl, tl, False) tA.append(tA_) tI.append(tI_) tIm.append(tIm_) res = {} print("Test for source domain:") print("{}: Acc:{}, IoU:{}, mIoU:{}".format(config.dataset, A, I, Im)) res[config.dataset] = {'Acc': A, 'IoU': I, 'mIoU': Im} print('Test for target domain:') for i, city in enumerate(self.target): print("{}: Acc:{}, IoU:{}, mIoU:{}".format(city, tA[i], tI[i], tIm[i])) res[city] = {'Acc': tA[i], 'IoU': tI[i], 'mIoU': tIm[i]} with open('train_log/test_bn.json', 'w') as f: json.dump(res, f)
class Test: def __init__(self, model_path, config, bn, save_path, save_batch, cuda=False): self.bn = bn self.target = config.all_dataset self.target.remove(config.dataset) # load source domain self.source_set = spacenet.Spacenet(city=config.dataset, split='test', img_root=config.img_root) self.source_loader = DataLoader(self.source_set, batch_size=16, shuffle=False, num_workers=2) self.save_path = save_path self.save_batch = save_batch self.target_set = [] self.target_loader = [] self.target_trainset = [] self.target_trainloader = [] self.config = config # load other domains for city in self.target: #test_img_root = '/home/home1/swarnakr/main/DomainAdaptation/satellite/' + city + '/' + 'test' #test = spacenet.Spacenet(city=city, split='test', img_root=test_img_root) self.target_set.append(test) self.target_loader.append( DataLoader(test, batch_size=16, shuffle=False, num_workers=2)) #train_img_root = '/home/home1/swarnakr/main/DomainAdaptation/satellite/' + city + '/' + 'train' #train = spacenet.Spacenet(city=city, split='train', img_root=train_img_root) self.target_trainset.append(train) self.target_trainloader.append( DataLoader(train, batch_size=16, shuffle=False, num_workers=2)) self.model = DeepLab(num_classes=2, backbone=config.backbone, output_stride=config.out_stride, sync_bn=config.sync_bn, freeze_bn=config.freeze_bn) if cuda: self.checkpoint = torch.load(model_path) else: self.checkpoint = torch.load(model_path, map_location=torch.device('cpu')) #print(self.checkpoint.keys()) self.model.load_state_dict(self.checkpoint) self.evaluator = Evaluator(2) self.cuda = cuda if cuda: self.model = self.model.cuda() def get_performance(self, dataloader, trainloader, city): # change mean and var of bn to adapt to the target domain if self.bn and city != self.config.dataset: print('BN Adaptation on' + city) self.model.train() # print layer params if 0: layr = 0 for h in self.model.modules(): if isinstance(h, nn.Conv2d): k1 = h.kernel_size[0] k2 = h.kernel_size[1] ch = h.out_channels print( 'L={} & ${} \\times {} \\times {}$ \\\\ \\hline'. format(layr, k1, k2, ch)) layr += 1 for sample in trainloader: image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) #pdb.set_trace() batch = self.save_batch self.model.eval() self.evaluator.reset() tbar = tqdm(dataloader, desc='\r') # save in different directories if self.bn: save_path = os.path.join(self.save_path, city + '_bn') else: save_path = os.path.join(self.save_path, city) # evaluate on the test dataset bn = dict() for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] if self.cuda: image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) # save BN params layr = 0 for h in self.model.modules(): if isinstance( h, nn.Conv2d ): #this is ok to do, since there is one conv2d layer corresponding to each BN layer. bn[(layr), 'weight'] = np.squeeze(h.weight) if isinstance(h, nn.BatchNorm2d): #Conv2d): bn[(layr, 'mean')] = h.running_mean bn[(layr, 'var')] = h.running_var layr += 1 #pdb.set_trace() if not os.path.exists(self.save_path): os.mkdir(self.save_path) torch.save(bn, os.path.join(save_path, 'bnAll.pth')) break pred = output.data.cpu().numpy() target = target.cpu().numpy() pred = np.argmax(pred, axis=1) # Add batch sample into evaluator self.evaluator.add_batch(target, pred) # save pictures if batch > 0: if not os.path.exists(self.save_path): os.mkdir(self.save_path) if not os.path.exists(save_path): os.mkdir(save_path) image = image.cpu().numpy() * 255 image = image.transpose(0, 2, 3, 1).astype(int) imgs = self.color_images(pred, target) self.save_images(imgs, batch, save_path, False) self.save_images(image, batch, save_path, True) batch -= 1 Acc = self.evaluator.Building_Acc() IoU = self.evaluator.Building_IoU() mIoU = self.evaluator.Mean_Intersection_over_Union() return Acc, IoU, mIoU def test(self): # A=[]; I=[]; Im=[]; #A, I, Im = self.get_performance(self.source_loader, None, self.config.dataset) tA, tI, tIm = [], [], [] for dl, tl, city in zip(self.target_loader, self.target_trainloader, self.target): #if city != 'Vegas': tA_, tI_, tIm_ = self.get_performance(dl, tl, city) tA.append(tA_) tI.append(tI_) tIm.append(tIm_) res = {} print("Test for source domain:") print("{}: Acc:{}, IoU:{}, mIoU:{}".format(self.config.dataset, A, I, Im)) res[config.dataset] = {'Acc': A, 'IoU': I, 'mIoU': Im} print('Test for target domain:') for i, city in enumerate(self.target): print("{}: Acc:{}, IoU:{}, mIoU:{}".format(city, tA[i], tI[i], tIm[i])) res[city] = {'Acc': tA[i], 'IoU': tI[i], 'mIoU': tIm[i]} if self.bn: name = 'train_log/test_bn.json' else: name = 'train_log/test.json' with open(name, 'w') as f: json.dump(res, f) def save_images(self, imgs, batch_index, save_path, if_original=False): for i, img in enumerate(imgs): img = img[:, :, ::-1] # change to BGR #from IPython import embed #embed() if not if_original: cv2.imwrite( os.path.join(save_path, str(batch_index) + str(i) + '_Original.jpg'), img) else: cv2.imwrite( os.path.join(save_path, str(batch_index) + str(i) + '_Pred.jpg'), img) def color_images(self, pred, target): imgs = [] for p, t in zip(pred, target): tmp = p * 2 + t np.squeeze(tmp) img = np.zeros((p.shape[0], p.shape[1], 3)) # bkg:negative, building:postive #from IPython import embed #embed() img[np.where(tmp == 0)] = [0, 0, 0] # Black RGB, for true negative img[np.where(tmp == 1)] = [255, 0, 0] # Red RGB, for false negative img[np.where(tmp == 2)] = [0, 255, 0] # Green RGB, for false positive img[np.where(tmp == 3)] = [255, 255, 0] #Yellow RGB, for true positive imgs.append(img) return imgs
class Trainer: def __init__(self, args, model, train_set, val_set, test_set, class_weights, saver): self.args = args self.saver = saver self.saver.save_experiment_config() self.train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) self.val_dataloader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) self.test_dataloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) self.train_summary = TensorboardSummary(os.path.join(self.saver.experiment_dir, "train")) self.train_writer = self.train_summary.create_summary() self.val_summary = TensorboardSummary(os.path.join(self.saver.experiment_dir, "validation")) self.val_writer = self.val_summary.create_summary() self.model = model self.dataset_size = {'train': len(train_set), 'val': len(val_set), 'test': len(test_set)} train_params = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, {'params': model.get_10x_lr_params(), 'lr': args.lr * 10}] if args.use_balanced_weights: weight = torch.from_numpy(class_weights.astype(np.float32)) else: weight = None if args.optimizer == 'SGD': print('Using SGD') self.optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': print('Using Adam') self.optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError self.lr_scheduler = None if args.use_lr_scheduler: if args.lr_scheduler == 'step': print('Using step lr scheduler') self.lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[int(x) for x in args.step_size.split(",")], gamma=0.1) self.criterion = SegmentationLosses(weight=weight, ignore_index=255, cuda=args.cuda).build_loss(mode=args.loss_type) self.evaluator = Evaluator(train_set.num_classes) self.best_pred = 0.0 def training(self, epoch): train_loss = 0.0 self.model.train() num_img_tr = len(self.train_dataloader) tbar = tqdm(self.train_dataloader, desc='\r') visualization_index = int(random.random() * len(self.train_dataloader)) vis_img, vis_tgt, vis_out = None, None, None self.train_writer.add_scalar('learning_rate', get_learning_rate(self.optimizer), epoch) for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] image, target = image.cuda(), target.cuda() self.optimizer.zero_grad() output = self.model(image) loss = self.criterion(output, target) loss.backward() self.optimizer.step() train_loss += loss.item() tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) self.train_writer.add_scalar('total_loss_iter', loss.item(), i + num_img_tr * epoch) if i == visualization_index: vis_img, vis_tgt, vis_out = image, target, output self.train_writer.add_scalar('total_loss_epoch', train_loss / self.dataset_size['train'], epoch) if constants.VISUALIZATION: self.train_summary.visualize_state(self.train_writer, self.args.dataset, vis_img, vis_tgt, vis_out, epoch) print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print('Loss: %.3f' % train_loss) print('BestPred: %.3f' % self.best_pred) def validation(self, epoch, test=False): self.model.eval() self.evaluator.reset() ret_list = [] if test: tbar = tqdm(self.test_dataloader, desc='\r') else: tbar = tqdm(self.val_dataloader, desc='\r') test_loss = 0.0 visualization_index = int(random.random() * len(self.val_dataloader)) vis_img, vis_tgt, vis_out = None, None, None for i, sample in enumerate(tbar): image, target = sample['image'], sample['label'] image, target = image.cuda(), target.cuda() with torch.no_grad(): output = self.model(image) if i == visualization_index: vis_img, vis_tgt, vis_out = image, target, output loss = self.criterion(output, target) test_loss += loss.item() tbar.set_description('Test loss: %.3f' % (test_loss / (i + 1))) pred = torch.argmax(output, dim=1).data.cpu().numpy() target = target.cpu().numpy() self.evaluator.add_batch(target, pred) Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() mIoU_20 = self.evaluator.Mean_Intersection_over_Union_20() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() if not test: self.val_writer.add_scalar('total_loss_epoch', test_loss / self.dataset_size['val'], epoch) self.val_writer.add_scalar('mIoU', mIoU, epoch) self.val_writer.add_scalar('mIoU_20', mIoU_20, epoch) self.val_writer.add_scalar('Acc', Acc, epoch) self.val_writer.add_scalar('Acc_class', Acc_class, epoch) self.val_writer.add_scalar('fwIoU', FWIoU, epoch) if constants.VISUALIZATION: self.val_summary.visualize_state(self.val_writer, self.args.dataset, vis_img, vis_tgt, vis_out, epoch) print("Test: " if test else "Validation:") print('[Epoch: %d, numImages: %5d]' % (epoch, i * self.args.batch_size + image.data.shape[0])) print("Acc:{}, Acc_class:{}, mIoU:{}, mIoU_20:{}, fwIoU: {}".format(Acc, Acc_class, mIoU, mIoU_20, FWIoU)) print('Loss: %.3f' % test_loss) if not test: new_pred = mIoU if new_pred > self.best_pred: self.best_pred = new_pred self.saver.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }) return test_loss, mIoU, mIoU_20, Acc, Acc_class, FWIoU#, ret_list def load_best_checkpoint(self): checkpoint = self.saver.load_checkpoint() self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) print(f'=> loaded checkpoint - epoch {checkpoint["epoch"]})') return checkpoint["epoch"]