def __init__(self, cfg): self.cfg = cfg self.device = torch.device(cfg.device) torch.cuda.set_device(cfg.device_id) # The room version dataset self.train_transform = Compose([ RandomRotate(degrees=180, axis=2), RandomScaleAnisotropic(scales=[0.8, 1.2], anisotropic=True), RandomSymmetry(axis=[True, False, False]), RandomNoise(sigma=0.001), DropFeature(drop_proba=0.2, feature_name='rgb'), AddFeatsByKeys(list_add_to_x=[True, True], feat_names=['pos', 'rgb'], delete_feats=[False, True]) ]) self.test_transform = Compose([ AddFeatsByKeys(list_add_to_x=[True, True], feat_names=['pos', 'rgb'], delete_feats=[False, True]) ]) self.dataset = Semantic3DWholeDataset( root=cfg.root, grid_size=cfg.grid_size, num_points=cfg.sample_num, train_sample_per_epoch=cfg.train_samples_per_epoch, test_sample_per_epoch=cfg.test_samples_per_epoch, train_transform=self.train_transform, test_transform=self.test_transform) self.dataset.create_dataloader(batch_size=cfg.batch_size, shuffle=True, num_workers=0, precompute_multi_scale=True, num_scales=5) self.test_probs = [ np.zeros(shape=(t.data.shape[0], cfg.num_classes), dtype=np.float32) for t in self.dataset.val_set.input_trees ] self.model = getattr(models, cfg.model_name)(in_channels=6, n_classes=cfg.num_classes, use_crf=cfg.use_crf, steps=cfg.steps) # self.optimizer = torch.optim.Adam(params=self.model.parameters(), # lr=cfg.lr, # weight_decay=cfg.weight_decay) self.optimizer = torch.optim.SGD(params=self.model.parameters(), lr=cfg.lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) self.scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, gamma=cfg.gamma) self.metrics = runningScore(cfg.num_classes, ignore_index=cfg.ignore_index)
def _train_epoch(self, epoch): self.model.train() epoch_start = time.time() batch_start = time.time() train_loss = 0. running_metric_text = runningScore(2) lr = self.optimizer.param_groups[0]['lr'] for i, batch in enumerate(self.train_loader): if i >= self.train_loader_len: break self.global_step += 1 lr = self.optimizer.param_groups[0]['lr'] # 数据进行转换和丢到gpu for key, value in batch.items(): if value is not None: if isinstance(value, torch.Tensor): batch[key] = value.to(self.device) cur_batch_size = batch['img'].size()[0] preds = self.model(batch['img']) loss_dict = self.criterion(preds, batch) # backward self.optimizer.zero_grad() loss_dict['loss'].backward() self.optimizer.step() self.scheduler.step() # acc iou score_shrink_map = cal_text_score(preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'], running_metric_text, thred=self.config['post_processing']['args']['thresh']) # loss 和 acc 记录到日志 loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item()) for idx, (key, value) in enumerate(loss_dict.items()): loss_dict[key] = value.item() if key == 'loss': continue loss_str += '{}: {:.4f}'.format(key, loss_dict[key]) if idx < len(loss_dict) - 1: loss_str += ', ' train_loss += loss_dict['loss'] acc = score_shrink_map['Mean Acc'] iou_shrink_map = score_shrink_map['Mean IoU'] if self.global_step % self.log_iter == 0: batch_time = time.time() - batch_start self.logger_info( '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}lr:{:.6}, time:{:.2f}'.format( epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc, iou_shrink_map, loss_str, lr, batch_time)) batch_start = time.time() return {'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start, 'epoch': epoch}
def __init__(self, args): if args.dataset == 'voc2012': self.n_channels = 21 elif args.dataset == 'cityscapes': self.n_channels = 20 elif args.dataset == 'acdc': self.n_channels = 4 # Define the network self.Gsi = define_Gen( input_nc=3, output_nc=self.n_channels, ngf=args.ngf, netG='resnet_9blocks_softmax', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) # for image to segmentation utils.print_networks([self.Gsi], ['Gsi']) self.CE = nn.CrossEntropyLoss() self.activation_softmax = nn.Softmax2d() self.gsi_optimizer = torch.optim.Adam(self.Gsi.parameters(), lr=args.lr, betas=(0.9, 0.999)) ### writer for tensorboard self.writer_supervised = SummaryWriter(tensorboard_loc + '_supervised') self.running_metrics_val = utils.runningScore(self.n_channels, args.dataset) self.args = args if not os.path.isdir(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) try: ckpt = utils.load_checkpoint('%s/latest_supervised_model.ckpt' % (args.checkpoint_dir)) self.start_epoch = ckpt['epoch'] self.Gsi.load_state_dict(ckpt['Gsi']) self.gsi_optimizer.load_state_dict(ckpt['gsi_optimizer']) self.best_iou = ckpt['best_iou'] except: print(' [*] No checkpoint!') self.start_epoch = 0 self.best_iou = -100
def __init__(self, cfg): self.cfg = cfg self.Image_generator = U_Net(in_ch=3, out_ch=cfg.DATASET.N_CLASS, norm=torch.nn.BatchNorm2d, side='no') train_dataset = BaseDataset(cfg, split='train') valid_dataset = BaseDataset(cfg, split='val') self.train_dataloader = data.DataLoader(train_dataset, batch_size=cfg.DATASET.BATCHSIZE, num_workers=8, shuffle=True, drop_last=True) self.valid_dataloader = data.DataLoader(valid_dataset, batch_size=cfg.DATASET.BATCHSIZE, num_workers=8, shuffle=True, drop_last=True) self.criterion = torch.nn.CrossEntropyLoss(ignore_index=self.cfg.LOSS.IGNORE_INDEX, weight=torch.tensor([1,0.5,0.5,1,3,1,1,1,1]).cuda()) self.ckpt_outdir = os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints') if not os.path.isdir(self.ckpt_outdir): os.mkdir(self.ckpt_outdir) self.val_outdir = os.path.join(cfg.TRAIN.OUTDIR, 'val') if not os.path.isdir(self.val_outdir): os.mkdir(self.val_outdir) self.start_epoch = cfg.TRAIN.RESUME self.n_epoch = cfg.TRAIN.N_EPOCH self.optimizer = torch.optim.Adam([{'params':self.Image_generator.parameters()}], lr=cfg.OPTIMIZER.G_LR, betas=(cfg.OPTIMIZER.BETA1, cfg.OPTIMIZER.BETA2), weight_decay=cfg.OPTIMIZER.WEIGHT_DECAY) iter_per_epoch = len(train_dataset)//cfg.DATASET.BATCHSIZE lambda_poly = lambda iters: pow((1.0 - iters / (cfg.TRAIN.N_EPOCH*iter_per_epoch)), 0.9) self.scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=lambda_poly,) self.logger = logger(cfg.TRAIN.OUTDIR, name='train') self.running_metrics = runningScore(n_classes=cfg.DATASET.N_CLASS) if self.start_epoch >= 0: self.Image_generator.load_state_dict( torch.load(os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format(self.start_epoch)))['model']) self.optimizer.load_state_dict( torch.load(os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format(self.start_epoch)))['optimizer']) log = "Using the {}th checkpoint".format(self.start_epoch) self.logger.info(log) self.Image_generator = self.Image_generator.cuda() self.criterion = self.criterion.cuda()
def _eval(self, epoch): self.model.eval() # torch.cuda.empty_cache() # speed up evaluating after training finished total_frame = 0.0 total_time = 0.0 running_metric_melons = runningScore(3) mean_acc = [] mean_iou = [] for i, batch in tqdm(enumerate(self.validate_loader), total=len(self.validate_loader), desc='test model'): with torch.no_grad(): # 数据进行转换和丢到gpu for key, value in batch.items(): if value is not None: if isinstance(value, torch.Tensor): batch[key] = value.to(self.device) start = time.time() # print(batch['img'].shape) # exit() preds = self.model(batch['img']) if isinstance(preds, tuple): preds = preds[0] target = batch['label'] h, w = target.size(1), target.size(2) scale_pred = F.interpolate(input=preds, size=(h, w), mode='bilinear', align_corners=True) label_preds = torch.argmax(scale_pred, dim=1) running_metric_melons.update(target.data.cpu().numpy(), label_preds.data.cpu().numpy()) score_, _ = running_metric_melons.get_scores() total_time += time.time() - start total_frame += batch['img'].size()[0] acc = score_['Mean Acc'] iou_Mean_map = score_['Mean IoU'] mean_acc.append(acc) mean_iou.append(iou_Mean_map) print('FPS:{}'.format(total_frame / total_time)) return np.array(mean_acc).mean(), np.array(mean_iou).mean()
def _train_epoch(self, epoch): self.model.train() epoch_start = time.time() batch_start = time.time() train_loss = 0. running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) lr = self.optimizer.param_groups[0]['lr'] for i, (images, labels, training_masks) in enumerate(self.train_loader): if i >= self.train_loader_len: break self.global_step += 1 lr = self.optimizer.param_groups[0]['lr'] # 数据进行转换和丢到gpu cur_batch_size = images.size()[0] images, labels, training_masks = images.to(self.device), labels.to( self.device), training_masks.to(self.device) preds = self.model(images) loss_all, loss_tex, loss_ker, loss_agg, loss_dis = self.criterion( preds, labels, training_masks) # backward self.optimizer.zero_grad() loss_all.backward() self.optimizer.step() if self.config['lr_scheduler']['type'] == 'PolynomialLR': self.scheduler.step() # acc iou score_text = cal_text_score(preds[:, 0, :, :], labels[:, 0, :, :], training_masks, running_metric_text) score_kernel = cal_kernel_score(preds[:, 1, :, :], labels[:, 1, :, :], labels[:, 0, :, :], training_masks, running_metric_kernel) # loss 和 acc 记录到日志 loss_all = loss_all.item() loss_tex = loss_tex.item() loss_ker = loss_ker.item() loss_agg = loss_agg.item() loss_dis = loss_dis.item() train_loss += loss_all acc = score_text['Mean Acc'] iou_text = score_text['Mean IoU'] iou_kernel = score_kernel['Mean IoU'] if (i + 1) % self.display_interval == 0: batch_time = time.time() - batch_start self.logger.info( '[{}/{}], [{}/{}], global_step: {}, Speed: {:.1f} samples/sec, acc: {:.4f}, iou_text: {:.4f}, iou_kernel: {:.4f}, loss_all: {:.4f}, loss_tex: {:.4f}, loss_ker: {:.4f}, loss_agg: {:.4f}, loss_dis: {:.4f}, lr:{:.6}, time:{:.2f}' .format( epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.display_interval * cur_batch_size / batch_time, acc, iou_text, iou_kernel, loss_all, loss_tex, loss_ker, loss_agg, loss_dis, lr, batch_time)) batch_start = time.time() if self.tensorboard_enable: # write tensorboard self.writer.add_scalar('TRAIN/LOSS/loss_all', loss_all, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_tex', loss_tex, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_ker', loss_ker, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_agg', loss_agg, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_dis', loss_dis, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/iou_text', iou_text, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/iou_kernel', iou_kernel, self.global_step) self.writer.add_scalar('TRAIN/lr', lr, self.global_step) if i % self.show_images_interval == 0: # show images on tensorboard self.writer.add_images('TRAIN/imgs', images, self.global_step) # text kernel and training_masks gt_texts, gt_kernels = labels[:, 0, :, :], labels[:, 1, :, :] gt_texts[gt_texts <= 0.5] = 0 gt_texts[gt_texts > 0.5] = 1 gt_kernels[gt_kernels <= 0.5] = 0 gt_kernels[gt_kernels > 0.5] = 1 show_label = torch.cat( [gt_texts, gt_kernels, training_masks.float()]) show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1) self.writer.add_image('TRAIN/gt', show_label, self.global_step) # model output preds[:, :2, :, :] = torch.sigmoid(preds[:, :2, :, :]) show_pred = torch.cat( [preds[:, 0, :, :], preds[:, 1, :, :]]) show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1) self.writer.add_image('TRAIN/preds', show_pred, self.global_step) return { 'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start, 'epoch': epoch }
def train_net(sequence,orientation,root_dir,model_name,net,n_classes,csv_path, epochs=5, batch_size=4, lr=0.1, cp=True,gpu=False,if_clahe=False, if_gamma_correction=False,if_mip=False,dir_checkpoints='models/'): global IMAGE_SIZE_LIST image_size_list=IMAGE_SIZE_LIST # set up paramerter optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.99, weight_decay=0.0005) best_iou = -100.0 start_epoch=0 if not os.path.exists(dir_checkpoints): os.mkdir(dir_checkpoints) print(''' Starting training: Model_name:{} Epochs: {} Batch size: {} Learning rate: {} '''.format(model_name,epochs, batch_size, lr)) running_metrics = runningScore(n_classes) scheduler = get_scheduler(optimizer, lr_policy='step', lr_decay_iters=50) ## if disk exists model resume_path = os.path.join(dir_checkpoints,options.model_name+'.pkl') if resume_path is not None: if os.path.isfile(resume_path): print("Loading model and optimizer from checkpoint '{}'".format(resume_path)) checkpoint = torch.load(resume_path) net.module.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})" .format(resume_path, checkpoint['epoch'])) start_epoch = int(checkpoint['epoch']) else: print("No checkpoint found at '{}'".format(resume_path)) else: init_weights(net, init_type='kaiming') temp_batch_size=batch_size for epoch in range(start_epoch,epochs): scheduler.step(epoch) total=0. acc=0. for size in image_size_list: if orientation==0: w=h=size if h>=480 or w>=480: ##for memomry concern batch_size=int(temp_batch_size/2) if h >= 600 or w >= 600: batch_size = int(temp_batch_size / 4) else: batch_size=int(temp_batch_size) else: h=96 w=size batch_size=temp_batch_size if batch_size==0: batch_size=1 # Setup Dataloader train_dataset = AtriaDataset(root_dir, if_subsequent=sequence,sequence_length=options.sequence_length,split='train',extra_label_csv_path=csv_path,extra_label=True,augmentation=True,input_h=h,input_w=w,preload_data=False,if_clahe=if_clahe,if_gamma_correction=if_gamma_correction,if_mip=if_mip,orientation=orientation) train_loader = DataLoader(dataset=train_dataset,num_workers=16, batch_size=batch_size, shuffle=True) test_dataset = AtriaDataset(root_dir, if_subsequent=sequence,split='validate',sequence_length=options.sequence_length,extra_label_csv_path=csv_path,extra_label=True,augmentation=True,input_h=h,input_w=w, preload_data=True,if_clahe=if_clahe,if_gamma_correction=if_gamma_correction,if_mip=if_mip,orientation=orientation) test_loader = DataLoader(dataset=test_dataset, num_workers=16, batch_size=batch_size, shuffle=True) print(''' Starting training: model_name:{} lr: {} Image size: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(model_name, str(scheduler.get_lr()),size, train_dataset.get_size(), test_dataset.get_size(), str(cp), str(gpu))) net.train() print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) for epoch_iter,data in tqdm(enumerate(train_loader, 1), total=len(train_loader)): images = data['input'] labels = data['target'] gt_pa_label = data['post_ablation'] if gpu: images = Variable(images.cuda()) labels = Variable(labels.cuda()) gt_pa_label = Variable(gt_pa_label.cuda()) else: images = Variable(images) labels = Variable(labels) gt_pa_label = Variable(gt_pa_label) optimizer.zero_grad() if isinstance(net, torch.nn.DataParallel): name=net.module.get_net_name() else: name=net.get_net_name() print ('network',name) outputs=net(images) loss = cross_entropy_2D(input=outputs[0], target=labels) logits=F.sigmoid(outputs[1]) classifier_fn = nn.CrossEntropyLoss() cs_loss = classifier_fn(input=logits, target=gt_pa_label) loss+=cs_loss loss.backward() optimizer.step() if (epoch_iter + 1) % 20 == 0: print("Epoch [%d/%d] Loss: %.4f" % (epoch+ 1, epochs, loss.item())) writer.add_scalar(options.model_name + '/loss', loss.item(), epoch+1) net.eval() for i_val,data in tqdm(enumerate(test_loader)): images_val = data['input'] labels_val = data['target'] gt_pa_label = data['post_ablation'] if gpu: images_val = Variable(images_val.cuda()) labels_val = Variable(labels_val.cuda()) else: images_val = Variable(images_val) labels_val = Variable(labels_val) with torch.no_grad(): outputs = net(images_val) sum, count = cal_cls_acc(outputs[1], gt_pa_label) total += count acc += sum pred = outputs[0].data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() ## segmentation result evaluate running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) writer.add_scalars(options.model_name + '/scalar_group',score, epoch+1) print('classification acc:',100 * acc / (1.0*total)) writer.add_scalar(options.model_name + '/classification',100 * acc / (1.0*total), epoch+1) acc=0. total=0. running_metrics.reset() if score['Mean IoU : \t'] >= best_iou: best_iou = score['Mean IoU : \t'] state = {'epoch': epoch + 1, 'model_state': net.module.state_dict(), 'optimizer_state': optimizer.state_dict(), 'if_mip':if_mip, 'if_gamma':if_gamma_correction, 'if_clahe':if_clahe, 'orientation':options.orientation, 'sequence_length':options.sequence_length, 'upsample_type':options.upsample_type } torch.save(state, os.path.join(dir_checkpoints,model_name+".pkl"))
def __init__(self, args): if args.dataset == 'voc2012': self.n_channels = 21 elif args.dataset == 'cityscapes': self.n_channels = 20 elif args.dataset == 'acdc': self.n_channels = 4 # Define the network self.Gsi = define_Gen( input_nc=3, output_nc=self.n_channels, ngf=args.ngf, netG='deeplab', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) # for image to segmentation ### Now we put in the pretrained weights in Gsi ### These will only be used in the case of VOC and cityscapes if args.dataset != 'acdc': saved_state_dict = torch.load(pretrained_loc) new_params = self.Gsi.state_dict().copy() for name, param in new_params.items(): # print(name) if name in saved_state_dict and param.size( ) == saved_state_dict[name].size(): new_params[name].copy_(saved_state_dict[name]) # print('copy {}'.format(name)) # self.Gsi.load_state_dict(new_params) utils.print_networks([self.Gsi], ['Gsi']) ###Defining an interpolation function so as to match the output of network to feature map size self.interp = nn.Upsample(size=(args.crop_height, args.crop_width), mode='bilinear', align_corners=True) self.interp_val = nn.Upsample(size=(512, 512), mode='bilinear', align_corners=True) self.CE = nn.CrossEntropyLoss() self.activation_softmax = nn.Softmax2d() self.gsi_optimizer = torch.optim.Adam(self.Gsi.parameters(), lr=args.lr, betas=(0.9, 0.999)) ### writer for tensorboard self.writer_supervised = SummaryWriter(tensorboard_loc + '_supervised') self.running_metrics_val = utils.runningScore(self.n_channels, args.dataset) self.args = args if not os.path.isdir(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) try: ckpt = utils.load_checkpoint('%s/latest_supervised_model.ckpt' % (args.checkpoint_dir)) self.start_epoch = ckpt['epoch'] self.Gsi.load_state_dict(ckpt['Gsi']) self.gsi_optimizer.load_state_dict(ckpt['gsi_optimizer']) self.best_iou = ckpt['best_iou'] except: print(' [*] No checkpoint!') self.start_epoch = 0 self.best_iou = -100
def __init__(self, args): if args.dataset == 'voc2012': self.n_channels = 21 elif args.dataset == 'cityscapes': self.n_channels = 20 elif args.dataset == 'acdc': self.n_channels = 4 # Define the network ##################################################### # for segmentaion to image self.Gis = define_Gen(input_nc=self.n_channels, output_nc=3, ngf=args.ngf, netG='deeplab', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) # for image to segmentation self.Gsi = define_Gen(input_nc=3, output_nc=self.n_channels, ngf=args.ngf, netG='deeplab', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) self.Di = define_Dis(input_nc=3, ndf=args.ndf, netD='pixel', n_layers_D=3, norm=args.norm, gpu_ids=args.gpu_ids) self.Ds = define_Dis( input_nc=self.n_channels, ndf=args.ndf, netD='pixel', n_layers_D=3, norm=args.norm, gpu_ids=args.gpu_ids) # for voc 2012, there are 21 classes self.old_Gis = define_Gen(input_nc=self.n_channels, output_nc=3, ngf=args.ngf, netG='resnet_9blocks', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) self.old_Gsi = define_Gen(input_nc=3, output_nc=self.n_channels, ngf=args.ngf, netG='resnet_9blocks_softmax', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) self.old_Di = define_Dis(input_nc=3, ndf=args.ndf, netD='pixel', n_layers_D=3, norm=args.norm, gpu_ids=args.gpu_ids) ### To put the pretrained weights in Gis and Gsi # if args.dataset != 'acdc': # saved_state_dict = torch.load(pretrained_loc) # new_params_Gsi = self.Gsi.state_dict().copy() # # new_params_Gis = self.Gis.state_dict().copy() # for name, param in new_params_Gsi.items(): # # print(name) # if name in saved_state_dict and param.size() == saved_state_dict[name].size(): # new_params_Gsi[name].copy_(saved_state_dict[name]) # # print('copy {}'.format(name)) # self.Gsi.load_state_dict(new_params_Gsi) # for name, param in new_params_Gis.items(): # # print(name) # if name in saved_state_dict and param.size() == saved_state_dict[name].size(): # new_params_Gis[name].copy_(saved_state_dict[name]) # # print('copy {}'.format(name)) # # self.Gis.load_state_dict(new_params_Gis) ### This is just so as to get pretrained methods for the case of Gis if args.dataset == 'voc2012': try: ckpt_for_Arnab_loss = utils.load_checkpoint( './ckpt_for_Arnab_loss.ckpt') self.old_Gis.load_state_dict(ckpt_for_Arnab_loss['Gis']) self.old_Gsi.load_state_dict(ckpt_for_Arnab_loss['Gsi']) except: print( '**There is an error in loading the ckpt_for_Arnab_loss**') utils.print_networks([self.Gsi], ['Gsi']) utils.print_networks([self.Gis, self.Gsi, self.Di, self.Ds], ['Gis', 'Gsi', 'Di', 'Ds']) self.args = args ### interpolation self.interp = nn.Upsample((args.crop_height, args.crop_width), mode='bilinear', align_corners=True) self.MSE = nn.MSELoss() self.L1 = nn.L1Loss() self.CE = nn.CrossEntropyLoss() self.activation_softmax = nn.Softmax2d() self.activation_tanh = nn.Tanh() self.activation_sigmoid = nn.Sigmoid() ### Tensorboard writer self.writer_semisuper = SummaryWriter(tensorboard_loc + '_semisuper') self.running_metrics_val = utils.runningScore(self.n_channels, args.dataset) ### For adding gaussian noise self.gauss_noise = utils.GaussianNoise(sigma=0.2) # Optimizers ##################################################### self.g_optimizer = torch.optim.Adam(itertools.chain( self.Gis.parameters(), self.Gsi.parameters()), lr=args.lr, betas=(0.5, 0.999)) self.d_optimizer = torch.optim.Adam(itertools.chain( self.Di.parameters(), self.Ds.parameters()), lr=args.lr, betas=(0.5, 0.999)) self.g_lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.g_optimizer, lr_lambda=utils.LambdaLR(args.epochs, 0, args.decay_epoch).step) self.d_lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.d_optimizer, lr_lambda=utils.LambdaLR(args.epochs, 0, args.decay_epoch).step) # Try loading checkpoint ##################################################### if not os.path.isdir(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) try: ckpt = utils.load_checkpoint('%s/latest_semisuper_cycleGAN.ckpt' % (args.checkpoint_dir)) self.start_epoch = ckpt['epoch'] self.Di.load_state_dict(ckpt['Di']) self.Ds.load_state_dict(ckpt['Ds']) self.Gis.load_state_dict(ckpt['Gis']) self.Gsi.load_state_dict(ckpt['Gsi']) self.d_optimizer.load_state_dict(ckpt['d_optimizer']) self.g_optimizer.load_state_dict(ckpt['g_optimizer']) self.best_iou = ckpt['best_iou'] except: print(' [*] No checkpoint!') self.start_epoch = 0 self.best_iou = -100
def _train_epoch(self, epoch): self.model.train() epoch_start = time.time() batch_start = time.time() train_loss = 0. running_metric_melons = runningScore(3) lr = self.optimizer.param_groups[0]['lr'] for i, batch in enumerate(self.train_loader): if i >= self.train_loader_len: break self.global_step += 1 lr = self.optimizer.param_groups[0]['lr'] print(self.optimizer, self.config['local_rank']) # 数据进行转换和丢到gpu for key, value in batch.items(): if value is not None: if isinstance(value, torch.Tensor): batch[key] = value.to(self.device) cur_batch_size = batch['img'].size()[0] # print('image name :',batch['img_name']) self.optimizer.zero_grad() preds = self.model(batch['img']) loss_dict = self.criterion(preds, batch) # backward if isinstance(preds, tuple): preds = preds[0] # print('preds:', preds.shape) # 反向传播时:在求导时开启侦测 # print(loss_dict['loss']) # exit() reduce_loss = self.all_reduce_tensor(loss_dict['loss']) with torch.autograd.detect_anomaly(): # loss.backward() loss_dict['loss'].backward() self.optimizer.step() if self.config['lr_scheduler']['type'] == 'WarmupPolyLR': self.scheduler.step() # acc iou target = batch['label'] h, w = target.size(1), target.size(2) scale_pred = F.interpolate(input=preds, size=(h, w), mode='bilinear', align_corners=True) label_preds = torch.argmax(scale_pred, dim=1) running_metric_melons.update(target.data.cpu().numpy(), label_preds.data.cpu().numpy()) score_, _ = running_metric_melons.get_scores() # loss 和 acc 记录到日志 loss_str = 'loss: {:.4f}, '.format(reduce_loss.item()) for idx, (key, value) in enumerate(loss_dict.items()): loss_dict[key] = value.item() if key == 'loss': continue loss_str += '{}: {:.4f}'.format(key, loss_dict[key]) if idx < len(loss_dict) - 1: loss_str += ', ' train_loss += loss_dict['loss'] print(train_loss / self.train_loader_len, self.config['local_rank']) acc = score_['Mean Acc'] iou_Mean_map = score_['Mean IoU'] if self.global_step % self.log_iter == 0: batch_time = time.time() - batch_start self.logger_info( '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_Mean_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}' .format(epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc, iou_Mean_map, loss_str, lr, batch_time)) batch_start = time.time() # print('loss_str', loss_str) if self.tensorboard_enable and self.config['local_rank'] == 0: # write tensorboard for key, value in loss_dict.items(): self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/iou_Mean_map', iou_Mean_map, self.global_step) self.writer.add_scalar('TRAIN/lr', lr, self.global_step) if self.global_step % self.show_images_iter == 0: # show images on tensorboard self.inverse_normalize(batch['img']) preds_colors = decode_predictions(preds, cur_batch_size, 3) self.writer.add_images('TRAIN/imgs', batch['img'][0].unsqueeze(0), self.global_step) target = batch['label'] # (8, 256, 320, 3) targets_colors = decode_labels(target, cur_batch_size, 3) self.writer.add_image('TRAIN/labels', targets_colors[0], self.global_step, dataformats='HWC') self.writer.add_image('TRAIN/preds', preds_colors[0], self.global_step, dataformats='HWC') return { 'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start, 'epoch': epoch, 'MeanIoU': iou_Mean_map }
def __init__(self, cfg): self.cfg = cfg self.OldLabel_generator = U_Net(in_ch=cfg.DATASET.N_CLASS, out_ch=cfg.DATASET.N_CLASS, side='out') self.Image_generator = U_Net(in_ch=3, out_ch=cfg.DATASET.N_CLASS, side='in') self.discriminator = Discriminator(cfg.DATASET.N_CLASS + 3, cfg.DATASET.IMGSIZE, patch=True) self.criterion_G = GeneratorLoss(cfg.LOSS.LOSS_WEIGHT[0], cfg.LOSS.LOSS_WEIGHT[1], cfg.LOSS.LOSS_WEIGHT[2], ignore_index=cfg.LOSS.IGNORE_INDEX) self.criterion_D = DiscriminatorLoss() train_dataset = BaseDataset(cfg, split='train') valid_dataset = BaseDataset(cfg, split='val') self.train_dataloader = data.DataLoader( train_dataset, batch_size=cfg.DATASET.BATCHSIZE, num_workers=8, shuffle=True, drop_last=True) self.valid_dataloader = data.DataLoader( valid_dataset, batch_size=cfg.DATASET.BATCHSIZE, num_workers=8, shuffle=True, drop_last=True) self.ckpt_outdir = os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints') if not os.path.isdir(self.ckpt_outdir): os.mkdir(self.ckpt_outdir) self.val_outdir = os.path.join(cfg.TRAIN.OUTDIR, 'val') if not os.path.isdir(self.val_outdir): os.mkdir(self.val_outdir) self.start_epoch = cfg.TRAIN.RESUME self.n_epoch = cfg.TRAIN.N_EPOCH self.optimizer_G = torch.optim.Adam( [{ 'params': self.OldLabel_generator.parameters() }, { 'params': self.Image_generator.parameters() }], lr=cfg.OPTIMIZER.G_LR, betas=(cfg.OPTIMIZER.BETA1, cfg.OPTIMIZER.BETA2), # betas=(cfg.OPTIMIZER.BETA1, cfg.OPTIMIZER.BETA2), weight_decay=cfg.OPTIMIZER.WEIGHT_DECAY) self.optimizer_D = torch.optim.Adam( [{ 'params': self.discriminator.parameters(), 'initial_lr': cfg.OPTIMIZER.D_LR }], lr=cfg.OPTIMIZER.D_LR, betas=(cfg.OPTIMIZER.BETA1, cfg.OPTIMIZER.BETA2), # betas=(cfg.OPTIMIZER.BETA1, cfg.OPTIMIZER.BETA2), weight_decay=cfg.OPTIMIZER.WEIGHT_DECAY) iter_per_epoch = len(train_dataset) // cfg.DATASET.BATCHSIZE lambda_poly = lambda iters: pow( (1.0 - iters / (cfg.TRAIN.N_EPOCH * iter_per_epoch)), 0.9) self.scheduler_G = torch.optim.lr_scheduler.LambdaLR( self.optimizer_G, lr_lambda=lambda_poly, ) # last_epoch=(self.start_epoch+1)*iter_per_epoch) self.scheduler_D = torch.optim.lr_scheduler.LambdaLR( self.optimizer_D, lr_lambda=lambda_poly, ) # last_epoch=(self.start_epoch+1)*iter_per_epoch) self.logger = logger(cfg.TRAIN.OUTDIR, name='train') self.running_metrics = runningScore(n_classes=cfg.DATASET.N_CLASS) if self.start_epoch >= 0: self.OldLabel_generator.load_state_dict( torch.load( os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format( self.start_epoch)))['model_G_N']) self.Image_generator.load_state_dict( torch.load( os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format( self.start_epoch)))['model_G_I']) self.discriminator.load_state_dict( torch.load( os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format( self.start_epoch)))['model_D']) self.optimizer_G.load_state_dict( torch.load( os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format( self.start_epoch)))['optimizer_G']) self.optimizer_D.load_state_dict( torch.load( os.path.join(cfg.TRAIN.OUTDIR, 'checkpoints', '{}epoch.pth'.format( self.start_epoch)))['optimizer_D']) log = "Using the {}th checkpoint".format(self.start_epoch) self.logger.info(log) self.Image_generator = self.Image_generator.cuda() self.OldLabel_generator = self.OldLabel_generator.cuda() self.discriminator = self.discriminator.cuda() self.criterion_G = self.criterion_G.cuda() self.criterion_D = self.criterion_D.cuda()
def __init__(self, args): if args.dataset == 'voc2012': self.n_channels = 21 elif args.dataset == 'cityscapes': self.n_channels = 20 elif args.dataset == 'acdc': self.n_channels = 4 # Define the network ##################################################### # for segmentaion to image self.Gis = define_Gen(input_nc=self.n_channels, output_nc=3, ngf=args.ngf, netG='resnet_9blocks', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) # for image to segmentation self.Gsi = define_Gen(input_nc=3, output_nc=self.n_channels, ngf=args.ngf, netG='resnet_9blocks_softmax', norm=args.norm, use_dropout=not args.no_dropout, gpu_ids=args.gpu_ids) self.Di = define_Dis(input_nc=3, ndf=args.ndf, netD='pixel', n_layers_D=3, norm=args.norm, gpu_ids=args.gpu_ids) self.Ds = define_Dis( input_nc=1, ndf=args.ndf, netD='pixel', n_layers_D=3, norm=args.norm, gpu_ids=args.gpu_ids) # for voc 2012, there are 21 classes utils.print_networks([self.Gis, self.Gsi, self.Di, self.Ds], ['Gis', 'Gsi', 'Di', 'Ds']) self.args = args self.MSE = nn.MSELoss() self.L1 = nn.L1Loss() self.CE = nn.CrossEntropyLoss() self.activation_softmax = nn.Softmax2d() ### Tensorboard writer self.writer_semisuper = SummaryWriter(tensorboard_loc + '_semisuper') self.running_metrics_val = utils.runningScore(self.n_channels, args.dataset) ### For adding gaussian noise self.gauss_noise = utils.GaussianNoise(sigma=0.2) # Optimizers ##################################################### self.g_optimizer = torch.optim.Adam(itertools.chain( self.Gis.parameters(), self.Gsi.parameters()), lr=args.lr, betas=(0.5, 0.999)) self.d_optimizer = torch.optim.Adam(itertools.chain( self.Di.parameters(), self.Ds.parameters()), lr=args.lr, betas=(0.5, 0.999)) self.g_lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.g_optimizer, lr_lambda=utils.LambdaLR(args.epochs, 0, args.decay_epoch).step) self.d_lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.d_optimizer, lr_lambda=utils.LambdaLR(args.epochs, 0, args.decay_epoch).step) # Try loading checkpoint ##################################################### if not os.path.isdir(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) try: ckpt = utils.load_checkpoint('%s/latest_semisuper_cycleGAN.ckpt' % (args.checkpoint_dir)) self.start_epoch = ckpt['epoch'] self.Di.load_state_dict(ckpt['Di']) self.Ds.load_state_dict(ckpt['Ds']) self.Gis.load_state_dict(ckpt['Gis']) self.Gsi.load_state_dict(ckpt['Gsi']) self.d_optimizer.load_state_dict(ckpt['d_optimizer']) self.g_optimizer.load_state_dict(ckpt['g_optimizer']) self.best_iou = ckpt['best_iou'] except: print(' [*] No checkpoint!') self.start_epoch = 0 self.best_iou = -100
def _train_epoch(self, epoch): self.model.train() epoch_start = time.time() batch_start = time.time() train_loss = 0. running_metric_text = runningScore(2) lr = self.optimizer.param_groups[0]['lr'] for i, batch in enumerate(self.train_loader): if i >= self.train_loader_len: break self.global_step += 1 lr = self.optimizer.param_groups[0]['lr'] # 数据进行转换和丢到gpu for key, value in batch.items(): if value is not None: if isinstance(value, torch.Tensor): batch[key] = value.to(self.device) cur_batch_size = batch['img'].size()[0] preds = self.model(batch['img']) loss_dict = self.criterion(preds, batch) # backward self.optimizer.zero_grad() loss_dict['loss'].backward() self.optimizer.step() if self.config['lr_scheduler']['type'] == 'WarmupPolyLR': self.scheduler.step() # acc iou score_shrink_map = cal_text_score( preds[:, 0, :, :], batch['shrink_map'], batch['shrink_mask'], running_metric_text, thred=self.config['post_processing']['args']['thresh']) # loss 和 acc 记录到日志 loss_str = 'loss: {:.4f}, '.format(loss_dict['loss'].item()) for idx, (key, value) in enumerate(loss_dict.items()): loss_dict[key] = value.item() if key == 'loss': continue loss_str += '{}: {:.4f}'.format(key, loss_dict[key]) if idx < len(loss_dict) - 1: loss_str += ', ' train_loss += loss_dict['loss'] acc = score_shrink_map['Mean Acc'] iou_shrink_map = score_shrink_map['Mean IoU'] if self.global_step % self.log_iter == 0: batch_time = time.time() - batch_start self.logger_info( '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}' .format(epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc, iou_shrink_map, loss_str, lr, batch_time)) batch_start = time.time() if self.tensorboard_enable and self.config['local_rank'] == 0: # write tensorboard for key, value in loss_dict.items(): self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map', iou_shrink_map, self.global_step) self.writer.add_scalar('TRAIN/lr', lr, self.global_step) if self.global_step % self.show_images_iter == 0: # show images on tensorboard self.inverse_normalize(batch['img']) self.writer.add_images('TRAIN/imgs', batch['img'], self.global_step) # shrink_labels and threshold_labels shrink_labels = batch['shrink_map'] threshold_labels = batch['threshold_map'] shrink_labels[shrink_labels <= 0.5] = 0 shrink_labels[shrink_labels > 0.5] = 1 show_label = torch.cat([shrink_labels, threshold_labels]) show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1) self.writer.add_image('TRAIN/gt', show_label, self.global_step) # model output show_pred = [] for kk in range(preds.shape[1]): show_pred.append(preds[:, kk, :, :]) show_pred = torch.cat(show_pred) show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1) self.writer.add_image('TRAIN/preds', show_pred, self.global_step) return { 'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start, 'epoch': epoch }
def __init__(self, cfg): self.cfg = cfg self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.dataparallel = torch.cuda.device_count() > 1 # dataset and dataloader # train_dataset = CityscapesDataset(root = cfg["train"]["cityscapes_root"], # split='train', # base_size=cfg["model"]["base_size"], # crop_size=cfg["model"]["crop_size"]) # val_dataset = CityscapesDataset(root = cfg["train"]["cityscapes_root"], # split='val', # base_size=cfg["model"]["base_size"], # crop_size=cfg["model"]["crop_size"]) train_dataset = SUNRGBDLoader(root=cfg["train"]["data_path"], split="training", is_transform=True, img_size=(cfg['train']['img_rows'], cfg['train']['img_cols']), img_norm=True) val_dataset = SUNRGBDLoader(root=cfg["train"]["data_path"], split="val", is_transform=True, img_size=(cfg['train']['img_rows'], cfg['train']['img_cols']), img_norm=True) self.train_dataloader = data.DataLoader( dataset=train_dataset, batch_size=cfg["train"]["train_batch_size"], shuffle=True, num_workers=0, pin_memory=True, drop_last=False) self.val_dataloader = data.DataLoader( dataset=val_dataset, batch_size=cfg["train"]["valid_batch_size"], shuffle=False, num_workers=0, pin_memory=True, drop_last=False) self.iters_per_epoch = len(self.train_dataloader) self.max_iters = cfg["train"]["epochs"] * self.iters_per_epoch # create network self.model = ICNet(nclass=train_dataset.n_classes, backbone='resnet50').to(self.device) # create criterion # self.criterion = ICNetLoss(ignore_index=train_dataset.IGNORE_INDEX).to(self.device) self.criterion = ICNetLoss(ignore_index=-1).to(self.device) # optimizer, for model just includes pretrained, head and auxlayer params_list = list() if hasattr(self.model, 'pretrained'): params_list.append({ 'params': self.model.pretrained.parameters(), 'lr': cfg["optimizer"]["init_lr"] }) if hasattr(self.model, 'exclusive'): for module in self.model.exclusive: params_list.append({ 'params': getattr(self.model, module).parameters(), 'lr': cfg["optimizer"]["init_lr"] * 10 }) self.optimizer = torch.optim.SGD( params=params_list, lr=cfg["optimizer"]["init_lr"], momentum=cfg["optimizer"]["momentum"], weight_decay=cfg["optimizer"]["weight_decay"]) # self.optimizer = torch.optim.SGD(params = self.model.parameters(), # lr = cfg["optimizer"]["init_lr"], # momentum=cfg["optimizer"]["momentum"], # weight_decay=cfg["optimizer"]["weight_decay"]) # lr scheduler # self.lr_scheduler = IterationPolyLR(self.optimizer, max_iters=self.max_iters, power=0.9) self.lr_scheduler = PloyStepLR(self.optimizer, milestone=3500) # self.lr_scheduler = ConstantLR(self.optimizer) # dataparallel if self.dataparallel: self.model = nn.DataParallel(self.model) # evaluation metrics self.metric = runningScore(train_dataset.n_classes) self.current_mIoU = 0.0 self.best_mIoU = 0.0 self.epochs = cfg["train"]["epochs"] self.current_epoch = 0 self.current_iteration = 0 if cfg["train"]["resume"] is not None: if os.path.isfile(cfg["train"]["resume"]): logger.info( "Loading model and optimizer from checkpoint '{}'".format( cfg["train"]["resume"])) checkpoint = torch.load(cfg["train"]["resume"]) self.model.load_state_dict(checkpoint["model_state"]) self.optimizer.load_state_dict(checkpoint["optimizer_state"]) self.lr_scheduler.load_state_dict( checkpoint["scheduler_state"]) self.current_epoch = checkpoint["epoch"] logger.info("Loaded checkpoint '{}' (iter {})".format( cfg["train"]["resume"], checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format( cfg["train"]["resume"]))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', type=str, default='deeplab-largefov') parser.add_argument( '--model_file', type=str, default= '/home/ecust/lx/Semantic-Segmentation-PyTorch/logs/deeplab-largefov_20190417_230357/model_best.pth.tar', help='Model path') parser.add_argument('--dataset_type', type=str, default='voc', help='type of dataset') parser.add_argument( '--dataset', type=str, default='/home/ecust/Datasets/PASCAL VOC/VOCdevkit/VOC2012', help='path to dataset') parser.add_argument('--img_size', type=tuple, default=None, help='resize images using bilinear interpolation') parser.add_argument('--crop_size', type=tuple, default=None, help='crop images') parser.add_argument('--n_classes', type=int, default=21, help='number of classes') parser.add_argument('--pretrained', type=bool, default=True, help='should be set the same as train.py') args = parser.parse_args() model_file = args.model_file root = args.dataset n_classes = args.n_classes crop = None # crop = Compose([RandomCrop(args.crop_size)]) loader = get_loader(args.dataset_type) val_loader = DataLoader(loader(root, n_classes=n_classes, split='val', img_size=args.img_size, augmentations=crop, pretrained=args.pretrained), batch_size=1, shuffle=False, num_workers=4) model, _, _ = Models.model_loader(args.model, n_classes, resume=None) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) print('==> Loading {} model file: {}'.format(model.__class__.__name__, model_file)) model_data = torch.load(model_file) try: model.load_state_dict(model_data) except Exception: model.load_state_dict(model_data['model_state_dict']) model.eval() print('==> Evaluating with {} dataset'.format(args.dataset_type)) visualizations = [] metrics = runningScore(n_classes) for data, target in tqdm.tqdm(val_loader, total=len(val_loader), ncols=80, leave=False): data, target = data.to(device), target.to(device) score = model(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy() lbl_true = target.data.cpu() for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = val_loader.dataset.untransform(img, lt) metrics.update(lt, lp) if len(visualizations) < 9: viz = visualize_segmentation(lbl_pred=lp, lbl_true=lt, img=img, n_classes=n_classes, dataloader=val_loader) visualizations.append(viz) acc, acc_cls, mean_iu, fwavacc, cls_iu = metrics.get_scores() print(''' Accuracy: {0:.2f} Accuracy Class: {1:.2f} Mean IoU: {2:.2f} FWAV Accuracy: {3:.2f}'''.format(acc * 100, acc_cls * 100, mean_iu * 100, fwavacc * 100) + '\n') class_name = val_loader.dataset.class_names if class_name is not None: for index, value in enumerate(cls_iu.values()): offset = 20 - len(class_name[index]) print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}') else: print("\nyou don't specify class_names, use number instead") for key, value in cls_iu.items(): print(key, f'{value * 100:>.2f}') viz = get_tile_image(visualizations) # img = Image.fromarray(viz) # img.save('viz_evaluate.png') scipy.misc.imsave('viz_evaluate.png', viz)
def _train_epoch(self, epoch): self.model.train() epoch_start = time.time() batch_start = time.time() train_loss = 0. running_metric_text = runningScore(2) lr = self.optimizer.param_groups[0]['lr'] for i, (images, shrink_labels, threshold_labels) in enumerate(self.train_loader): if i >= self.train_loader_len: break self.global_step += 1 lr = self.optimizer.param_groups[0]['lr'] # 数据进行转换和丢到gpu cur_batch_size = images.size()[0] images, shrink_labels, threshold_labels = images.to( self.device), shrink_labels.to( self.device), threshold_labels.to(self.device) preds = self.model(images) loss_all, loss_shrink_map, loss_binary_map, loss_threshold_map = self.criterion( preds, shrink_labels, threshold_labels) # backward self.optimizer.zero_grad() loss_all.backward() self.optimizer.step() if self.config['lr_scheduler']['type'] == 'WarmupPolyLR': self.scheduler.step() # acc iou score_shrink_map = cal_text_score(preds[:, 0, :, :], shrink_labels, running_metric_text, thred=0.5) # loss 和 acc 记录到日志 loss_all = loss_all.item() loss_shrink_map = loss_shrink_map.item() loss_binary_map = loss_binary_map.item() loss_threshold_map = loss_threshold_map.item() train_loss += loss_all acc = score_shrink_map['Mean Acc'] iou_shrink_map = score_shrink_map['Mean IoU'] if (i + 1) % self.display_interval == 0: batch_time = time.time() - batch_start self.logger.info( '[{}/{}], [{}/{}], global_step: {}, Speed: {:.1f} samples/sec, acc: {:.4f}, iou_shrink_map: {:.4f}, loss_all: {:.4f}, loss_shrink_map: {:.4f}, loss_binary_map: {:.4f}, loss_threshold_map: {:.4f}, lr:{:.6}, time:{:.2f}' .format( epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.display_interval * cur_batch_size / batch_time, acc, iou_shrink_map, loss_all, loss_shrink_map, loss_binary_map, loss_threshold_map, lr, batch_time)) batch_start = time.time() if self.tensorboard_enable: # write tensorboard self.writer.add_scalar('TRAIN/LOSS/loss_all', loss_all, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_shrink_map', loss_shrink_map, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_binary_map', loss_binary_map, self.global_step) self.writer.add_scalar('TRAIN/LOSS/loss_threshold_map', loss_threshold_map, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/iou_shrink_map', iou_shrink_map, self.global_step) self.writer.add_scalar('TRAIN/lr', lr, self.global_step) if i % self.show_images_interval == 0: # show images on tensorboard self.writer.add_images('TRAIN/imgs', images, self.global_step) # shrink_labels and threshold_labels shrink_labels[shrink_labels <= 0.5] = 0 shrink_labels[shrink_labels > 0.5] = 1 show_label = torch.cat([shrink_labels, threshold_labels]) show_label = vutils.make_grid(show_label.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1) self.writer.add_image('TRAIN/gt', show_label, self.global_step) # model output show_pred = torch.cat([ preds[:, 0, :, :], preds[:, 1, :, :], preds[:, 2, :, :] ]) show_pred = vutils.make_grid(show_pred.unsqueeze(1), nrow=cur_batch_size, normalize=False, padding=20, pad_value=1) self.writer.add_image('TRAIN/preds', show_pred, self.global_step) return { 'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start, 'epoch': epoch }