def worker(args, dev_id, start_idx, end_idx, result_queue): torch.cuda.set_device(dev_id) # Dataset and Loader dataset_val = ValDataset( broden_dataset.record_list['validation'], args, max_sample=args.num_val, start_idx=start_idx, end_idx=end_idx) loader_val = torchdata.DataLoader( dataset_val, batch_size=args.batch_size, shuffle=False, collate_fn=user_scattered_collate, num_workers=2) # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder( arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder( arch=args.arch_decoder, fc_dim=args.fc_dim, nr_classes=args.nr_classes, weights=args.weights_decoder, use_softmax=True) segmentation_module = SegmentationModule(net_encoder, net_decoder) segmentation_module.cuda() # Main loop evaluate(segmentation_module, loader_val, args, dev_id, result_queue)
def main(args): torch.cuda.set_device(args.gpu_id) # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder( arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder( arch=args.arch_decoder, fc_dim=args.fc_dim, nr_classes=args.nr_classes, weights=args.weights_decoder, use_softmax=True) segmentation_module = SegmentationModule(net_encoder, net_decoder) segmentation_module.cuda() # Dataset and Loader list_test = [{'fpath_img': args.test_img}] dataset_val = TestDataset( list_test, args, max_sample=args.num_val) loader_val = torchdata.DataLoader( dataset_val, batch_size=args.batch_size, shuffle=False, collate_fn=user_scattered_collate, num_workers=5, drop_last=True) # Main loop test(segmentation_module, loader_val, args) print('Inference done!')
def main(args): # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder( arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder( arch=args.arch_decoder, fc_dim=args.fc_dim, nr_classes=args.nr_classes, weights=args.weights_decoder) # TODO(LYC):: move criterion outside model. # crit = nn.NLLLoss(ignore_index=-1) if args.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule( net_encoder, net_decoder, args.deep_sup_scale) else: segmentation_module = SegmentationModule( net_encoder, net_decoder) print('1 Epoch = {} iters'.format(args.epoch_iters)) # create loader iterator iterator_train = create_multi_source_train_data_loader(args=args) # load nets into gpu if args.num_gpus > 1: segmentation_module = UserScatteredDataParallel( segmentation_module, device_ids=range(args.num_gpus)) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder) optimizers = create_optimizers(nets, args) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(args.start_epoch, args.num_epoch + 1): train(segmentation_module, iterator_train, optimizers, history, epoch, args) # checkpointing checkpoint(nets, history, args, epoch) print('Training Done!')
def main(args): # Network Builders torch.manual_seed(0) torch.cuda.manual_seed(0) np.random.seed(0) random.seed(0) builder = ModelBuilder() net_sound = builder.build_sound( arch=args.arch_sound, input_channel=1, output_channel=args.num_channels, fc_dim=args.num_channels, weights=args.weights_sound) net_frame = builder.build_frame( arch=args.arch_frame, fc_dim=args.num_channels, pool_type=args.img_pool, weights=args.weights_frame) net_avol = builder.build_avol( arch=args.arch_avol, fc_dim=args.num_channels, weights=args.weights_frame) crit_loc = nn.BCELoss() crit_sep = builder.build_criterion(arch=args.loss) # Dataset and Loader dataset_train = MUSICMixDataset( args.list_train, args, split='train') dataset_val = MUSICMixDataset( args.list_val, args, max_sample=args.num_val, split='val') loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=int(args.workers), drop_last=True) loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=args.batch_size, shuffle=False, num_workers=int(args.workers), drop_last=False) args.epoch_iters = len(dataset_train) // args.batch_size print('1 Epoch = {} iters'.format(args.epoch_iters)) # Set up optimizer optimizer = create_optimizer(net_sound, net_frame, net_avol, args) # History of peroformance history = { 'train': {'epoch': [], 'err': [], 'err_loc': [], 'err_sep': [], 'acc': []}, 'val': {'epoch': [], 'err': [], 'err_loc': [], 'err_sep': [], 'acc': [], 'sdr': [], 'sir': [], 'sar': []}} # Training loop # Load from pretrained models start_epoch = 1 model_name = args.ckpt + '/checkpoint.pth' if os.path.exists(model_name): if args.mode == 'eval': net_sound, net_frame, net_avol = load_checkpoint_from_train(net_sound, net_frame, net_avol, model_name) elif args.mode == 'train': model_name = args.ckpt + '/checkpoint_latest.pth' net_sound, net_frame, net_avol, optimizer, start_epoch, history = load_checkpoint(net_sound, net_frame, net_avol, optimizer, history, model_name) print("Loading from previous checkpoint.") else: if args.mode == 'train' and start_epoch==1 and os.path.exists(args.weights_model): net_sound, net_frame = load_sep(net_sound, net_frame, args.weights_model) print("Loading from appearance + sound checkpoint.") # Wrap networks netWrapper1 = NetWrapper1(net_sound) netWrapper1 = torch.nn.DataParallel(netWrapper1, device_ids=range(args.num_gpus)).cuda() netWrapper1.to(args.device) netWrapper2 = NetWrapper2(net_frame) netWrapper2 = torch.nn.DataParallel(netWrapper2, device_ids=range(args.num_gpus)).cuda() netWrapper2.to(args.device) netWrapper3 = NetWrapper3(net_avol) netWrapper3 = torch.nn.DataParallel(netWrapper3, device_ids=range(args.num_gpus)).cuda() netWrapper3.to(args.device) # Eval mode #evaluate(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_val, history, 0, args) if args.mode == 'eval': evaluate(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_val, history, 0, args) print('Evaluation Done!') return for epoch in range(start_epoch, args.num_epoch + 1): train(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_train, optimizer, history, epoch, args) # drop learning rate if epoch in args.lr_steps: adjust_learning_rate(optimizer, args) ## Evaluation and visualization if epoch % args.eval_epoch == 0: evaluate(crit_loc, crit_sep, netWrapper1, netWrapper2, netWrapper3, loader_val, history, epoch, args) # checkpointing checkpoint(net_sound, net_frame, net_avol, optimizer, history, epoch, args) print('Training Done!')
cfg.merge_from_file(model['config']) cfg.DATASET['root_dataset'] = './.data/vision/ade20k' cfg.DATASET['list_train'] = "./.data/vision/ade20k/training.odgt" cfg.DATASET['list_val'] = "./.data/vision/ade20k/validation.odgt" BATCH_SIZE = cfg.VAL.batch_size BATCH_SIZE = 32 if not os.path.isdir(os.path.join(cfg.DIR, "result")): os.makedirs(os.path.join(cfg.DIR, "result")) torch.cuda.set_device(gpu) # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_val = ValDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_val, cfg.DATASET)
def main(args): # import network architecture builder = ModelBuilder() model = builder.build_net(arch=args.id, num_input=args.num_input, num_classes=args.num_classes, num_branches=args.num_branches, padding_list=args.padding_list, dilation_list=args.dilation_list) device_ids = [0, 2] # model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpus))).cuda() model = torch.nn.DataParallel(model, device_ids=device_ids).cuda() # model = model.cuda() cudnn.benchmark = True # collect the number of parameters in the network print("------------------------------------------") print("Network Architecture of Model %s:" % (args.id)) num_para = 0 for name, param in model.named_parameters(): num_mul = 1 for x in param.size(): num_mul *= x num_para += num_mul print(model) print("Number of trainable parameters %d in Model %s" % (num_para, args.id)) print("------------------------------------------") # set the optimizer and loss optimizer = optim.RMSprop(model.parameters(), args.lr, alpha=args.alpha, eps=args.eps, weight_decay=args.weight_decay, momentum=args.momentum) criterion = nn.CrossEntropyLoss() if args.resume: if os.path.isfile(args.resume): print("=> Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['opt_dict']) print("=> Loaded checkpoint (epoch {})".format( checkpoint['epoch'])) else: print("=> No checkpoint found at '{}'".format(args.resume)) # loading data tf = TrainDataset(train_dir, args) train_loader = DataLoader(tf, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.num_workers, pin_memory=True) print("Start training ...") for epoch in tqdm(range(args.start_epoch + 1, args.num_epochs + 1)): train(train_loader, model, criterion, optimizer, epoch, args) # save models if epoch > args.particular_epoch: if epoch % args.save_epochs_steps == 0: save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), 'opt_dict': optimizer.state_dict() }, epoch, args) print("Training Done")
def main(cfg, gpus): torch.cuda.set_device(gpus[0]) # Network Builders net_objectness = ModelBuilder.build_objectness( arch=cfg.MODEL.arch_objectness, weights=cfg.MODEL.weights_enc_query, fix_encoder=cfg.TRAIN.fix_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), input_dim=cfg.MODEL.decoder_dim, fc_dim=cfg.MODEL.fc_dim, ppm_dim=cfg.MODEL.ppm_dim, num_class=2, weights=cfg.MODEL.weights_decoder, dropout_rate=cfg.MODEL.dropout_rate, use_dropout=cfg.MODEL.use_dropout, use_softmax=True) crit = nn.NLLLoss(ignore_index=255) net_objectness.cuda() net_objectness.eval() net_decoder.cuda() net_decoder.eval() print('###### Prepare data ######') data_name = cfg.DATASET.name if data_name == 'VOC': if cfg.VAL.test_with_classes: from dataloaders.customized import voc_fewshot else: from dataloaders.customized_objectness import voc_fewshot make_data = voc_fewshot max_label = 20 elif data_name == 'COCO': if cfg.VAL.test_with_classes: from dataloaders.customized import coco_fewshot else: from dataloaders.customized_objectness import coco_fewshot make_data = coco_fewshot max_label = 80 split = cfg.DATASET.data_split + '2014' annFile = f'{cfg.DATASET.data_dir}/annotations/instances_{split}.json' cocoapi = COCO(annFile) else: raise ValueError('Wrong config for dataset!') labels = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][ cfg.TASK.fold_idx] #labels = CLASS_LABELS[data_name][cfg.TASK.fold_idx] #transforms = [Resize_test(size=cfg.DATASET.input_size)] val_transforms = [ transforms.ToNumpy(), transforms.Resize_pad(size=cfg.DATASET.input_size[0]) ] value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] '''val_transforms = [ transforms.ToNumpy(), #transforms.RandScale([0.9, 1.1]), #transforms.RandRotate([-10, 10], padding=mean, ignore_label=0), #transforms.RandomGaussianBlur(), #transforms.RandomHorizontalFlip(), transforms.Crop([cfg.DATASET.input_size[0], cfg.DATASET.input_size[1]], crop_type='rand', padding=mean, ignore_label=0)]''' val_transforms = Compose(val_transforms) print('###### Testing begins ######') metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs) with torch.no_grad(): for run in range(cfg.VAL.n_runs): print(f'### Run {run + 1} ###') set_seed(cfg.VAL.seed + run) print(f'### Load data ###') dataset = make_data( base_dir=cfg.DATASET.data_dir, split=cfg.DATASET.data_split, transforms=val_transforms, to_tensor=transforms.ToTensorNormalize_noresize(), labels=labels, max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch, n_ways=cfg.TASK.n_ways, n_shots=cfg.TASK.n_shots, n_queries=cfg.TASK.n_queries, permute=cfg.VAL.permute_labels, ) if data_name == 'COCO': coco_cls_ids = dataset.datasets[0].dataset.coco.getCatIds() testloader = DataLoader(dataset, batch_size=cfg.VAL.n_batch, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print(f"Total # of Data: {len(dataset)}") count = 0 for sample_batched in tqdm.tqdm(testloader): feed_dict = data_preprocess(sample_batched, cfg) if data_name == 'COCO': label_ids = [ coco_cls_ids.index(x) + 1 for x in sample_batched['class_ids'] ] else: label_ids = list(sample_batched['class_ids']) feat = net_objectness(feed_dict['img_data'], return_feature_maps=True) query_pred = net_decoder(feat, segSize=(473, 473)) metric.record(np.array(query_pred.argmax(dim=1)[0].cpu()), np.array(feed_dict['seg_label'][0].cpu()), labels=label_ids, n_run=run) if cfg.VAL.visualize: #print(as_numpy(feed_dict['seg_label'][0].cpu()).shape) #print(as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())).shape) #print(feed_dict['img_data'].cpu().shape) query_name = sample_batched['query_ids'][0][0] support_name = sample_batched['support_ids'][0][0][0] if data_name == 'VOC': img = imread( os.path.join(cfg.DATASET.data_dir, 'JPEGImages', query_name + '.jpg')) else: query_name = int(query_name) img_meta = cocoapi.loadImgs(query_name)[0] img = imread( os.path.join(cfg.DATASET.data_dir, split, img_meta['file_name'])) #img = imresize(img, cfg.DATASET.input_size) visualize_result( (img, as_numpy(feed_dict['seg_label'][0].cpu()), '%05d' % (count)), as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())), os.path.join(cfg.DIR, 'result')) count += 1 classIoU, meanIoU = metric.get_mIoU(labels=sorted(labels), n_run=run) classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(n_run=run) '''_run.log_scalar('classIoU', classIoU.tolist()) _run.log_scalar('meanIoU', meanIoU.tolist()) _run.log_scalar('classIoU_binary', classIoU_binary.tolist()) _run.log_scalar('meanIoU_binary', meanIoU_binary.tolist()) _log.info(f'classIoU: {classIoU}') _log.info(f'meanIoU: {meanIoU}') _log.info(f'classIoU_binary: {classIoU_binary}') _log.info(f'meanIoU_binary: {meanIoU_binary}')''' classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU( labels=sorted(labels)) classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary( ) print('----- Final Result -----') print('final_classIoU', classIoU.tolist()) print('final_classIoU_std', classIoU_std.tolist()) print('final_meanIoU', meanIoU.tolist()) print('final_meanIoU_std', meanIoU_std.tolist()) print('final_classIoU_binary', classIoU_binary.tolist()) print('final_classIoU_std_binary', classIoU_std_binary.tolist()) print('final_meanIoU_binary', meanIoU_binary.tolist()) print('final_meanIoU_std_binary', meanIoU_std_binary.tolist()) print(f'classIoU mean: {classIoU}') print(f'classIoU std: {classIoU_std}') print(f'meanIoU mean: {meanIoU}') print(f'meanIoU std: {meanIoU_std}') print(f'classIoU_binary mean: {classIoU_binary}') print(f'classIoU_binary std: {classIoU_std_binary}') print(f'meanIoU_binary mean: {meanIoU_binary}') print(f'meanIoU_binary std: {meanIoU_std_binary}')
def main(args): # Network Builders builder = ModelBuilder() unet = builder.build_unet(num_class=args.num_class, arch=args.unet_arch, weights=args.weights_unet) print("Froze the following layers: ") for name, p in unet.named_parameters(): if p.requires_grad == False: print(name) print() crit = DualLoss(mode="train") segmentation_module = SegmentationModule(crit, unet) train_augs = Compose([ PaddingCenterCrop(256), RandomHorizontallyFlip(), RandomVerticallyFlip(), RandomRotate(180) ]) test_augs = Compose([PaddingCenterCrop(256)]) # Dataset and Loader # dataset_train = AC17( #Loads 3D volumes # root=args.data_root, # split='train', # k_split=args.k_split, # augmentations=train_augs) dataset_train = SideWalkData( # Loads 3D volumes root=args.data_root, split='train', k_split=args.k_split, augmentations=train_augs) ac17_train = load2D( dataset_train, split='train', deform=True) #Dataloader for 2D slices. Requires 3D loader. loader_train = data.DataLoader(ac17_train, batch_size=args.batch_size_per_gpu, shuffle=True, num_workers=int(args.workers), drop_last=True, pin_memory=True) dataset_val = SideWalkData(root=args.data_root, split='val', k_split=args.k_split, augmentations=test_augs) ac17_val = load2D(dataset_val, split='val', deform=False) loader_val = data.DataLoader(ac17_val, batch_size=1, shuffle=False, collate_fn=user_scattered_collate, num_workers=5, drop_last=True) # load nets into gpu if len(args.gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=args.gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) if args.unet == False else (unet, crit) optimizers = create_optimizers(nets, args) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': [], 'jaccard': []}} best_val = { 'epoch_1': 0, 'mIoU_1': 0, 'epoch_2': 0, 'mIoU_2': 0, 'epoch_3': 0, 'mIoU_3': 0, 'epoch': 0, 'mIoU': 0 } for epoch in range(args.start_epoch, args.num_epoch + 1): train(segmentation_module, loader_train, optimizers, history, epoch, args) iou, loss = eval(loader_val, segmentation_module, args, crit) #checkpointing ckpted = False if loss < 0.215: ckpted = True if iou[0] > best_val['mIoU_1']: best_val['epoch_1'] = epoch best_val['mIoU_1'] = iou[0] ckpted = True if iou[1] > best_val['mIoU_2']: best_val['epoch_2'] = epoch best_val['mIoU_2'] = iou[1] ckpted = True if iou[2] > best_val['mIoU_3']: best_val['epoch_3'] = epoch best_val['mIoU_3'] = iou[2] ckpted = True if (iou[0] + iou[1] + iou[2]) / 3 > best_val['mIoU']: best_val['epoch'] = epoch best_val['mIoU'] = (iou[0] + iou[1] + iou[2]) / 3 ckpted = True if epoch % 50 == 0: checkpoint(nets, history, args, epoch) continue if epoch == args.num_epoch: checkpoint(nets, history, args, epoch) continue if epoch < 15: ckpted = False if ckpted == False: continue else: checkpoint(nets, history, args, epoch) continue print() print('Training Done!')
def main(args): torch.cuda.set_device(args.gpu) # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=args.num_class, weights=args.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader # list_test = [{'fpath_img': args.test_img}] # test_chk = [] # testing = os.listdir("/home/teai/externalhd2/BDD100K/segmentation_v2/test/") # for i in testing: # if(i.endswith(".jpg")): # test_chk.append("/home/teai/externalhd2/BDD100K/segmentation_v2/test/"+i) video_path = "./test_video_input/test_1.mp4" vidcap = cv2.VideoCapture(video_path) video_fps = math.ceil(vidcap.get(cv2.CAP_PROP_FPS)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_array = [] for i in tqdm(range(length - 1)): ret, frame = vidcap.read() cv2.imwrite("./test_video_input/frame.png", frame) test_chk = ['./test_video_input/frame.png'] # print(type(args.test_imgs)) list_test = [{'fpath_img': x} for x in test_chk] #list_test=[{'fpath_img': 'frame_143.png'},{'fpath_img': 'frame_100.png'},{'fpath_img': 'frame_1.png'}] #print("list_test",list_test) dataset_test = TestDataset(list_test, args, max_sample=args.num_val) loader_test = torchdata.DataLoader(dataset_test, batch_size=args.batch_size, shuffle=False, collate_fn=user_scattered_collate, num_workers=5, drop_last=True) segmentation_module.cuda() # Main loop # start=time.time() test(segmentation_module, loader_test, args) # end=time.time() # print("Time taken",(end-start)) #print('Inference done!') img = cv2.imread("./test_video_output/frame.png") height, width, layers = img.shape size = (width, height) frame_array.append(img) out = cv2.VideoWriter("./test_video_output/test_1_sgd_100.mp4", cv2.VideoWriter_fourcc(*'DIVX'), video_fps, size) for i in range(len(frame_array)): # writing to a image array out.write(frame_array[i]) out.release()
pred,ind = torch.max(pred, dim=1) ind = as_numpy((ind.squeeze()).cpu()) seg[:,:,0] = ind im = bridge.cv2_to_imgmsg(seg, "mono8") im.header = data.header pub.publish(im) stop = timeit.default_timer() print(stop-start) img_transform = transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.]) device = torch.cuda.set_device(0) builder = ModelBuilder() net_encoder = builder.build_encoder( arch='resnet18dilated', fc_dim=512, weights="ckpt/baseline-resnet18dilated-c1_deepsup/encoder_epoch_2.pth") net_decoder = builder.build_decoder( arch='c1_deepsup', fc_dim=512, num_class=12, weights="ckpt/baseline-resnet18dilated-c1_deepsup/decoder_epoch_2.pth", use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.half()
def main(cfg, gpus): # Network Builders label_num_ = args.num_class if args.method == 'tdnet': n_img_per_gpu = int(args.batchsize / args.gpu_num) n_min = n_img_per_gpu * args.cropsize * args.cropsize // 16 loss_fn = OhemCELoss2D(thresh=0.7, n_min=n_min, ignore_index=255) segmentation_module = td4_psp(args=args, backbone='resnet18', loss_fn=loss_fn) segmentation_module.pretrained_init() else: net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder, args=args) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=label_num_, weights=cfg.MODEL.weights_decoder) crit = nn.NLLLoss(ignore_index=255) if args.method == 'netwarp': segmentation_module = NetWarp(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'ETC': segmentation_module = ETC(net_encoder, net_decoder, crit, args, cfg.TRAIN.deep_sup_scale) elif args.method == 'nonlocal3d': segmentation_module = Non_local3d(args, net_encoder, crit) elif args.method == 'our_warp': if args.deepsup_scale > 0.: segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args, args.deepsup_scale) else: segmentation_module = ClipWarpNet(net_encoder, net_decoder, crit, args) elif args.method == 'propnet': segmentation_module = PropNet(net_encoder, net_decoder, crit, args, deep_sup_scale=args.deepsup_scale) elif args.method == 'our_warp_merge': segmentation_module = OurWarpMerge(net_encoder, net_decoder, crit, args, deep_sup_scale=0.4) elif args.method == 'clip_psp': segmentation_module = Clip_PSP(net_encoder, crit, args, deep_sup_scale=0.4) elif args.method == 'clip_ocr': segmentation_module = ClipOCRNet(net_encoder, crit, args, deep_sup_scale=0.4) elif args.method == 'netwarp_ocr': segmentation_module = NetWarp_ocr(net_encoder, crit, args, deep_sup_scale=0.4) elif args.method == 'etc_ocr': segmentation_module = ETC_ocr(net_encoder, crit, args, deep_sup_scale=0.4) else: raise (NotImplementedError) # Dataset and Loader if args.method == 'clip_psp' or args.method == 'clip_ocr': dataset_train = BaseDataset_longclip(args, 'train') else: dataset_train = BaseDataset_clip(args, 'train') loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=args.batchsize, # we have modified data_parallel shuffle=True, # we do not use this param num_workers=args.workers, drop_last=True, pin_memory=False) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # load nets into gpu segmentation_module.cuda(args.start_gpu) optimizer = create_optimizers(segmentation_module, cfg, args) if args.resume_epoch != 0: to_load = torch.load( os.path.join('./resume', 'model_epoch_{}.pth'.format(args.resume_epoch)), map_location=torch.device("cuda:" + str(args.start_gpu))) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in to_load.items(): name = k[7:] # remove `module.`,表面从第7个key值字符取到最后一个字符,正好去掉了module. new_state_dict[name] = v #新字典的key值对应的value为一一对应的值。 cfg.TRAIN.start_epoch = args.resume_epoch segmentation_module.load_state_dict(new_state_dict) optimizer.load_state_dict( torch.load( os.path.join('./resume', 'opt_epoch_{}.pth'.format(args.resume_epoch)), map_location=torch.device("cuda:" + str(args.start_gpu)))) print('resume from epoch {}'.format(args.resume_epoch)) if args.gpu_num > 1: train_gpu_ = list(range(args.gpu_num)) train_gpu_ = [int(gpu_ + args.start_gpu) for gpu_ in train_gpu_] print(train_gpu_) segmentation_module = torch.nn.DataParallel(segmentation_module, device_ids=train_gpu_) patch_replication_callback(segmentation_module) # print(segmentation_module) # Set up optimizers # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} #if len(args.resume_dir)>0: # resume_epoch = args.resume_dir.split('.')[] for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): print('Epoch {}'.format(epoch)) #checkpoint(optimizer,segmentation_module, history, args, epoch+1) train(segmentation_module, loader_train, optimizer, history, epoch + 1, cfg, args) ################### # checkpointing if (epoch + 1) % 20 == 0: checkpoint(optimizer, segmentation_module, history, args, epoch + 1) if args.validation: test(segmentation_module, args) # print('Training Done!')
def main(): """Create the model and start the training.""" with open(args.config) as f: config = yaml.load(f) for k, v in config['common'].items(): setattr(args, k, v) mkdirs(osp.join("logs/"+args.exp_name)) logger = create_logger('global_logger', "logs/" + args.exp_name + '/log.txt') logger.info('{}'.format(args)) ############################## for key, val in vars(args).items(): logger.info("{:16} {}".format(key, val)) logger.info("random_scale {}".format(args.random_scale)) logger.info("is_training {}".format(args.is_training)) h, w = map(int, args.input_size.split(',')) input_size = (h, w) h, w = map(int, args.input_size_target.split(',')) input_size_target = (h, w) print(type(input_size_target[1])) cudnn.enabled = True args.snapshot_dir = args.snapshot_dir + args.exp_name tb_logger = SummaryWriter("logs/"+args.exp_name) ############################## #validation data local_array = np.load("local.npy") local_array = local_array[:,:,:19] local_array = local_array / local_array.sum(2).reshape(512, 1024, 1) local_array = local_array.transpose(2,0,1) local_array = torch.from_numpy(local_array) local_array = local_array.view(1, 19, 512, 1024) h, w = map(int, args.input_size_test.split(',')) input_size_test = (h,w) h, w = map(int, args.com_size.split(',')) com_size = (h, w) h, w = map(int, args.input_size_crop.split(',')) input_size_crop = h,w h,w = map(int, args.input_size_target_crop.split(',')) input_size_target_crop = h,w mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize_module = transforms_seg.Normalize(mean=mean, std=std) test_normalize = transforms.Normalize(mean=mean, std=std) test_transform = transforms.Compose([ transforms.Resize((input_size_test[1], input_size_test[0])), transforms.ToTensor(), test_normalize]) valloader = data.DataLoader(cityscapesDataSet( args.data_dir_target, args.data_list_target_val, crop_size=input_size_test, set='train', transform=test_transform),num_workers=args.num_workers, batch_size=1, shuffle=False, pin_memory=True) with open('./dataset/cityscapes_list/info.json', 'r') as fp: info = json.load(fp) mapping = np.array(info['label2train'], dtype=np.int) label_path_list_val = args.label_path_list_val gt_imgs_val = open(label_path_list_val, 'r').read().splitlines() gt_imgs_val = [osp.join(args.data_dir_target_val, x) for x in gt_imgs_val] name_classes = np.array(info['label'], dtype=np.str) interp_val = nn.Upsample(size=(com_size[1], com_size[0]),mode='bilinear', align_corners=True) #### #build model #### builder = ModelBuilder() net_encoder = builder.build_encoder( arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder( arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=args.num_classes, weights=args.weights_decoder, use_aux=True) weighted_softmax = pd.read_csv("weighted_loss.txt", header=None) weighted_softmax = weighted_softmax.values weighted_softmax = torch.from_numpy(weighted_softmax) weighted_softmax = weighted_softmax / torch.sum(weighted_softmax) weighted_softmax = weighted_softmax.cuda().float() model = SegmentationModule( net_encoder, net_decoder, args.use_aux) if args.num_gpus > 1: model = torch.nn.DataParallel(model) patch_replication_callback(model) model.cuda() nets = (net_encoder, net_decoder, None, None) optimizers = create_optimizer(nets, args) cudnn.enabled=True cudnn.benchmark=True model.train() mean_mapping = [0.485, 0.456, 0.406] mean_mapping = [item * 255 for item in mean_mapping] if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) source_transform = transforms_seg.Compose([ transforms_seg.Resize([input_size[1], input_size[0]]), #segtransforms.RandScale((0.75, args.scale_max)), #segtransforms.RandRotate((args.rotate_min, args.rotate_max), padding=mean_mapping, ignore_label=args.ignore_label), #segtransforms.RandomGaussianBlur(), #segtransforms.RandomHorizontalFlip(), #segtransforms.Crop([input_size_crop[1], input_size_crop[0]], crop_type='rand', padding=mean_mapping, ignore_label=args.ignore_label), transforms_seg.ToTensor(), normalize_module]) target_transform = transforms_seg.Compose([ transforms_seg.Resize([input_size_target[1], input_size_target[0]]), #segtransforms.RandScale((0.75, args.scale_max)), #segtransforms.RandRotate((args.rotate_min, args.rotate_max), padding=mean_mapping, ignore_label=args.ignore_label), #segtransforms.RandomGaussianBlur(), #segtransforms.RandomHorizontalFlip(), #segtransforms.Crop([input_size_target_crop[1], input_size_target_crop[0]],crop_type='rand', padding=mean_mapping, ignore_label=args.ignore_label), transforms_seg.ToTensor(), normalize_module]) trainloader = data.DataLoader( GTA5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size, crop_size=input_size, transform = source_transform), batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True) trainloader_iter = enumerate(trainloader) targetloader = data.DataLoader(fake_cityscapesDataSet(args.data_dir_target, args.data_list_target, max_iters=args.num_steps * args.iter_size * args.batch_size, crop_size=input_size_target, set=args.set, transform=target_transform), batch_size=args.batch_size, shuffle=True, num_workers=5, pin_memory=True) targetloader_iter = enumerate(targetloader) # implement model.optim_parameters(args) to handle different models' lr setting criterion_seg = torch.nn.CrossEntropyLoss(ignore_index=255,reduce=False) criterion_pseudo = torch.nn.BCEWithLogitsLoss(reduce=False).cuda() bce_loss = torch.nn.BCEWithLogitsLoss().cuda() criterion_reconst = torch.nn.L1Loss().cuda() criterion_soft_pseudo = torch.nn.MSELoss(reduce=False).cuda() criterion_box = torch.nn.CrossEntropyLoss(ignore_index=255, reduce=False) interp = nn.Upsample(size=(input_size[1], input_size[0]),align_corners=True, mode='bilinear') interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), align_corners=True, mode='bilinear') # labels for adversarial training source_label = 0 target_label = 1 optimizer_encoder, optimizer_decoder, optimizer_disc, optimizer_reconst = optimizers batch_time = AverageMeter(10) loss_seg_value1 = AverageMeter(10) best_mIoUs = 0 best_test_mIoUs = 0 loss_seg_value2 = AverageMeter(10) loss_reconst_source_value = AverageMeter(10) loss_reconst_target_value = AverageMeter(10) loss_balance_value = AverageMeter(10) loss_eq_att_value = AverageMeter(10) loss_pseudo_value = AverageMeter(10) bounding_num = AverageMeter(10) pseudo_num = AverageMeter(10) loss_bbx_att_value = AverageMeter(10) for i_iter in range(args.num_steps): # train G # don't accumulate grads in D end = time.time() _, batch = trainloader_iter.__next__() images, labels, _ = batch images = Variable(images).cuda(async=True) labels = Variable(labels).cuda(async=True) results = model(images, labels) loss_seg2 = results[-2] loss_seg1 = results[-1] loss_seg2 = torch.mean(loss_seg2) loss_seg1 = torch.mean(loss_seg1) loss = args.lambda_trade_off*(loss_seg2+args.lambda_seg * loss_seg1) # proper normalization #logger.info(loss_seg1.data.cpu().numpy()) loss_seg_value2.update(loss_seg2.data.cpu().numpy()) optimizer_encoder.zero_grad() optimizer_decoder.zero_grad() loss.backward() optimizer_encoder.step() optimizer_decoder.step() _, batch = targetloader_iter.__next__() images, fake_labels, _ = batch images = Variable(images).cuda(async=True) fake_labels = Variable(fake_labels, requires_grad=False).cuda() results = model(images, None) target_seg = results[0] conf_tea, pseudo_label = torch.max(nn.functional.softmax(target_seg), dim=1) pseudo_label = pseudo_label.detach() # pseudo label hard loss_pseudo = criterion_seg(target_seg, pseudo_label) fake_mask = (fake_labels!=255).float().detach() conf_mask = torch.gt(conf_tea, args.conf_threshold).float().detach() loss_pseudo = loss_pseudo * conf_mask.detach() * fake_mask.detach() loss_pseudo = loss_pseudo.view(-1) loss_pseudo = loss_pseudo[loss_pseudo!=0] #loss_pseudo = torch.sum(loss_pseudo * conf_mask.detach() * fake_mask.detach()) predict_class_mean = torch.mean(nn.functional.softmax(target_seg), dim=0).mean(1).mean(1) equalise_cls_loss = robust_binary_crossentropy(predict_class_mean, weighted_softmax) #equalise_cls_loss = torch.mean(equalise_cls_loss)* args.num_classes * torch.sum(conf_mask * fake_mask) / float(input_size_crop[0] * input_size_crop[1] * args.batch_size) # new equalise_cls_loss equalise_cls_loss = torch.mean(equalise_cls_loss) #loss=args.lambda_balance * equalise_cls_loss #bbx attention loss_bbx_att = [] loss_eq_att = [] for box_idx, box_size in enumerate(args.box_size): pooling = torch.nn.AvgPool2d(box_size) pooling_result_i = pooling(target_seg) local_i = pooling(local_array).float().cuda() pooling_conf_mask, pooling_pseudo = torch.max(nn.functional.softmax(pooling_result_i), dim=1) pooling_conf_mask = torch.gt(pooling_conf_mask, args.conf_threshold).float().detach() fake_mask_i = pooling(fake_labels.unsqueeze(1).float()) fake_mask_i = fake_mask_i.squeeze(1) fake_mask_i = (fake_mask_i!=255).float().detach() loss_bbx_att_i = criterion_seg(pooling_result_i, pooling_pseudo) loss_bbx_att_i = loss_bbx_att_i * pooling_conf_mask * fake_mask_i loss_bbx_att_i = loss_bbx_att_i.view(-1) loss_bbx_att_i = loss_bbx_att_i[loss_bbx_att_i!=0] loss_bbx_att.append(loss_bbx_att_i) pooling_result_i = pooling_result_i.mean(0).unsqueeze(0) equalise_cls_loss_i = robust_binary_crossentropy(nn.functional.softmax(pooling_result_i), local_i) equalise_cls_loss_i = equalise_cls_loss_i.mean(1) equalise_cls_loss_i = equalise_cls_loss_i * pooling_conf_mask * fake_mask_i equalise_cls_loss_i = equalise_cls_loss_i.view(-1) equalise_cls_loss_i = equalise_cls_loss_i[equalise_cls_loss_i!=0] loss_eq_att.append(equalise_cls_loss_i) if len(args.box_size) > 0: if args.merge_1x1: loss_bbx_att.append(loss_pseudo) loss_bbx_att = torch.cat(loss_bbx_att, dim=0) bounding_num.update(loss_bbx_att.size(0) / float(560*480*args.batch_size)) loss_bbx_att = torch.mean(loss_bbx_att) loss_eq_att = torch.cat(loss_eq_att, dim=0) loss_eq_att = torch.mean(loss_eq_att) loss_eq_att_value.update(loss_eq_att.item()) else: loss_bbx_att = torch.mean(loss_pseudo) loss_eq_att = 0 pseudo_num.update(loss_pseudo.size(0) / float(560*480*args.batch_size)) loss_pseudo = torch.mean(loss_pseudo) if not args.merge_1x1: loss += args.lambda_pseudo * loss_pseudo loss = args.lambda_balance * equalise_cls_loss if not isinstance(loss_bbx_att, list): loss += args.lambda_pseudo * loss_bbx_att loss += args.lambda_eq * loss_eq_att loss_pseudo_value.update(loss_pseudo.item()) loss_balance_value.update(equalise_cls_loss.item()) optimizer_encoder.zero_grad() optimizer_decoder.zero_grad() loss.backward() optimizer_encoder.step() optimizer_decoder.step() #optimizer_disc.step() #loss_target_disc_value.update(loss_target_disc.data.cpu().numpy()) batch_time.update(time.time() - end) remain_iter = args.num_steps - i_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) if i_iter == args.decrease_lr: adjust_learning_rate(optimizer_encoder, i_iter, args.lr_encoder, args) adjust_learning_rate(optimizer_decoder, i_iter, args.lr_decoder, args) if i_iter % args.print_freq == 0: lr_encoder = optimizer_encoder.param_groups[0]['lr'] lr_decoder = optimizer_decoder.param_groups[0]['lr'] logger.info('exp = {}'.format(args.snapshot_dir)) logger.info('Iter = [{0}/{1}]\t' 'Time = {batch_time.avg:.3f}\t' 'loss_seg1 = {loss_seg1.avg:4f}\t' 'loss_seg2 = {loss_seg2.avg:.4f}\t' 'loss_reconst_source = {loss_reconst_source.avg:.4f}\t' 'loss_bbx_att = {loss_bbx_att.avg:.4f}\t' 'loss_reconst_target = {loss_reconst_target.avg:.4f}\t' 'loss_pseudo = {loss_pseudo.avg:.4f}\t' 'loss_eq_att = {loss_eq_att.avg:.4f}\t' 'loss_balance = {loss_balance.avg:.4f}\t' 'bounding_num = {bounding_num.avg:.4f}\t' 'pseudo_num = {pseudo_num.avg:4f}\t' 'lr_encoder = {lr_encoder:.8f} lr_decoder = {lr_decoder:.8f}'.format( i_iter, args.num_steps, batch_time=batch_time, loss_seg1=loss_seg_value1, loss_seg2=loss_seg_value2, loss_pseudo=loss_pseudo_value, loss_bbx_att = loss_bbx_att_value, bounding_num = bounding_num, loss_eq_att = loss_eq_att_value, pseudo_num = pseudo_num, loss_reconst_source=loss_reconst_source_value, loss_balance=loss_balance_value, loss_reconst_target=loss_reconst_target_value, lr_encoder=lr_encoder, lr_decoder=lr_decoder)) logger.info("remain_time: {}".format(remain_time)) if not tb_logger is None: tb_logger.add_scalar('loss_seg_value1', loss_seg_value1.avg, i_iter) tb_logger.add_scalar('loss_seg_value2', loss_seg_value2.avg, i_iter) tb_logger.add_scalar('bounding_num', bounding_num.avg, i_iter) tb_logger.add_scalar('pseudo_num', pseudo_num.avg, i_iter) tb_logger.add_scalar('loss_pseudo', loss_pseudo_value.avg, i_iter) tb_logger.add_scalar('lr', lr_encoder, i_iter) tb_logger.add_scalar('loss_balance', loss_balance_value.avg, i_iter) ##### #save image result if i_iter % args.save_pred_every == 0 and i_iter != 0: logger.info('taking snapshot ...') model.eval() val_time = time.time() hist = np.zeros((19,19)) # f = open(args.result_dir, 'a') # for index, batch in tqdm(enumerate(testloader)): # with torch.no_grad(): # image, name = batch # results = model(Variable(image).cuda(), None) # output2 = results[0] # pred = interp_val(output2) # del output2 # pred = pred.cpu().data[0].numpy() # pred = pred.transpose(1, 2, 0) # pred = np.asarray(np.argmax(pred, axis=2), dtype=np.uint8) # label = np.array(Image.open(gt_imgs_val[index])) # #label = np.array(label.resize(com_size, Image. # label = label_mapping(label, mapping) # #logger.info(label.shape) # hist += fast_hist(label.flatten(), pred.flatten(), 19) # mIoUs = per_class_iu(hist) # for ind_class in range(args.num_classes): # logger.info('===>' + name_classes[ind_class] + ':\t' + str(round(mIoUs[ind_class] * 100, 2))) # tb_logger.add_scalar(name_classes[ind_class] + '_mIoU', mIoUs[ind_class], i_iter) # logger.info(mIoUs) # tb_logger.add_scalar('val mIoU', mIoUs, i_iter) # tb_logger.add_scalar('val mIoU', mIoUs, i_iter) # f.write('i_iter:{:d},\tmiou:{:0.3f} \n'.format(i_iter, mIoUs)) # f.close() # if mIoUs > best_mIoUs: is_best = True # best_mIoUs = mIoUs #test validation model.eval() val_time = time.time() hist = np.zeros((19,19)) # f = open(args.result_dir, 'a') for index, batch in tqdm(enumerate(valloader)): with torch.no_grad(): image, name = batch results = model(Variable(image).cuda(), None) output2 = results[0] pred = interp_val(output2) del output2 pred = pred.cpu().data[0].numpy() pred = pred.transpose(1, 2, 0) pred = np.asarray(np.argmax(pred, axis=2), dtype=np.uint8) label = np.array(Image.open(gt_imgs_val[index])) #label = np.array(label.resize(com_size, Image. label = label_mapping(label, mapping) #logger.info(label.shape) hist += fast_hist(label.flatten(), pred.flatten(), 19) mIoUs = per_class_iu(hist) for ind_class in range(args.num_classes): logger.info('===>' + name_classes[ind_class] + ':\t' + str(round(mIoUs[ind_class] * 100, 2))) tb_logger.add_scalar(name_classes[ind_class] + '_mIoU', mIoUs[ind_class], i_iter) mIoUs = round(np.nanmean(mIoUs) *100, 2) is_best_test = False logger.info(mIoUs) tb_logger.add_scalar('test mIoU', mIoUs, i_iter) if mIoUs > best_test_mIoUs: best_test_mIoUs = mIoUs is_best_test = True # logger.info("best mIoU {}".format(best_mIoUs)) logger.info("best test mIoU {}".format(best_test_mIoUs)) net_encoder, net_decoder, net_disc, net_reconst = nets save_checkpoint(net_encoder, 'encoder', i_iter, args, is_best_test) save_checkpoint(net_decoder, 'decoder', i_iter, args, is_best_test) is_best_test = False model.train()
def main(args): # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=150, weights=args.weights_decoder) crit = nn.NLLLoss(ignore_index=-1) if args.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, args.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) ######## for param in segmentation_module.encoder.parameters(): param.requires_grad = False #for name, param in segmentation_module.decoder.named_parameters(): # print(name) # if(name == "conv_last.weight" or name =="conv_last.bias" or name =="conv_last_deepsup.weight" or name =="conv_last_deepsup.bias"): # param.requires_grad = True #else: # param.requires_grad = False #print(param.requires_grad) segmentation_module.decoder.conv_last = nn.Conv2d(args.fc_dim // 4, 12, 1, 1, 0) #segmentation_module.decoder.conv_last. segmentation_module.decoder.conv_last_deepsup = nn.Conv2d( args.fc_dim // 4, 12, 1, 1, 0) ######## # Dataset and Loader dataset_train = TrainDataset(args.list_train, args, batch_per_gpu=args.batch_size_per_gpu) loader_train = torchdata.DataLoader( dataset_train, batch_size=len(args.gpus), # we have modified data_parallel shuffle=False, # we do not use this param collate_fn=user_scattered_collate, num_workers=int(args.workers), drop_last=True, pin_memory=True) print('1 Epoch = {} iters'.format(args.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) ####### #torch.backends.cudnn.benchmark = True #CUDA_LAUNCH_BLOCKING=1 ####### # load nets into gpu if len(args.gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=args.gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, args) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(args.start_epoch, args.num_epoch + 1): train(segmentation_module, iterator_train, optimizers, history, epoch, args) # checkpointing checkpoint(nets, history, args, epoch) print('Training Done!')
def main(args): # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder(weights=args.weights_encoder) net_decoder_1 = builder.build_decoder(weights=args.weights_decoder, use_softmax=False) net_decoder_2 = builder.build_decoder(arch='c1',num_class=args.num_class, num_plane=args.num_plane, use_softmax=False, weights=args.weights_plane_net) # Warp application module warp = NovelViewHomography() if args.weighted_class: crit1 = nn.NLLLoss(ignore_index=-1, weight=args.class_weight) else: crit1 = nn.NLLLoss(ignore_index=-1) crit2 = nn.MSELoss() # Dataset and Loader dataset_train_sup = CityScapes('train', root=args.root_cityscapes, cropSize=args.imgSize, max_sample=args.num_sup, is_train=1) dataset_train_unsup = CityScapes('train', root=args.root_cityscapes, cropSize=args.imgSize, max_sample=-1, is_train=1) dataset_val = CityScapes('val', root=args.root_cityscapes, cropSize=args.imgSize, max_sample=args.num_val, is_train=0) loader_train_sup = torch.utils.data.DataLoader( dataset_train_sup, batch_size=args.batch_size, shuffle=True, num_workers=int(args.workers), drop_last=True) loader_train_unsup = torch.utils.data.DataLoader( dataset_train_unsup, batch_size=args.batch_size, shuffle=True, num_workers=int(args.workers), drop_last=True) loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=args.batch_size_eval, shuffle=False, num_workers=int(args.workers), drop_last=True) args.epoch_iters = int((args.gamma * len(dataset_train_sup) + len(dataset_train_unsup)) / args.batch_size) print('1 Epoch = {} iters'.format(args.epoch_iters)) # load nets into gpu if args.num_gpus > 1: net_encoder = nn.DataParallel(net_encoder, device_ids=range(args.num_gpus)) net_decoder_1 = nn.DataParallel(net_decoder_1, device_ids=range(args.num_gpus)) net_decoder_2 = nn.DataParallel(net_decoder_2, device_ids=range(args.num_gpus)) nets = (net_encoder, net_decoder_1, net_decoder_2, warp, crit1, crit2) for net in nets: net.cuda() # Set up optimizers optimizers = create_optimizers(nets, args) # Main loop history = {split: {'epoch': [], 'err': [], 'acc': [], 'mIoU': []} for split in ('train', 'val')} # optional initial eval evaluate(nets, loader_val, history, 0, args) for epoch in range(1, args.num_epoch + 1): train(nets, loader_train_sup, loader_train_unsup, optimizers, history, epoch, args) # Evaluation and visualization if epoch % args.eval_epoch == 0: evaluate(nets, loader_val, history, epoch, args) # checkpointing checkpoint(nets, history, args) # adjust learning rate adjust_learning_rate(optimizers, epoch, args) print('Training Done!')
def main(cfg, gpus): # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) crit = nn.NLLLoss(ignore_index=-1) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET, batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=len(gpus), # we have modified data_parallel shuffle=False, # we do not use this param collate_fn=user_scattered_collate, num_workers=cfg.TRAIN.workers, drop_last=True, pin_memory=True) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) # load nets into gpu if len(gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): train(segmentation_module, iterator_train, optimizers, history, epoch + 1, cfg) # checkpointing checkpoint(nets, history, cfg, epoch + 1) print('Training Done!')
def load_model(data_gen: AudioGenerator, model_builder: ModelBuilder): model = model_builder.model(input_shape=(None, data_gen.input_dim), output_dim=29) model.load_weights('results/' + ("Spec " if data_gen.spectrogram else "MFCC ") + model.name + '.h5') return model
def main(cfg, gpus): torch.backends.cudnn.enabled = False # cudnn.deterministic = False # cudnn.enabled = True # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) if cfg.MODEL.arch_decoder == 'ocr': print('Using cross entropy loss') crit = CrossEntropy(ignore_label=-1) else: crit = nn.NLLLoss(ignore_index=-1) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET, batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=len(gpus), shuffle=False, # parameter is not used collate_fn=user_scattered_collate, num_workers=cfg.TRAIN.workers, drop_last=True, pin_memory=True) # create loader iterator iterator_train = iter(loader_train) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) if cfg.TRAIN.eval: # Dataset and Loader for validtaion data dataset_val = ValDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_val, cfg.DATASET) loader_val = torch.utils.data.DataLoader( dataset_val, batch_size=cfg.VAL.batch_size, shuffle=False, collate_fn=user_scattered_collate, num_workers=5, drop_last=True) iterator_val = iter(loader_val) # load nets into gpu if len(gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) # Main loop history = { 'train': { 'epoch': [], 'loss': [], 'acc': [], 'last_score': 0, 'best_score': cfg.TRAIN.best_score } } for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): train(segmentation_module, iterator_train, optimizers, history, epoch + 1, cfg) # calculate segmentation score if cfg.TRAIN.eval and epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch, step=cfg.TRAIN.eval_step): iou, acc = evaluate(segmentation_module, iterator_val, cfg, gpus) history['train']['last_score'] = (iou + acc) / 2 if history['train']['last_score'] > history['train']['best_score']: history['train']['best_score'] = history['train']['last_score'] checkpoint(nets, history, cfg, 'best_score') # checkpointing checkpoint(nets, history, cfg, epoch + 1) print('Training Done!')
def main(cfg, gpus): # Network Builders torch.cuda.set_device(gpus[0]) print('###### Create model ######') net_enc_query = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_enc_query, fix_encoder=cfg.TRAIN.fix_encoder) net_enc_memory = ModelBuilder.build_encoder_memory_separate( arch=cfg.MODEL.arch_memory_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_enc_memory, num_class=cfg.TASK.n_ways + 1, RGB_mask_combine_val=cfg.DATASET.RGB_mask_combine_val, segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate) net_att_query = ModelBuilder.build_attention( arch=cfg.MODEL.arch_attention, input_dim=cfg.MODEL.encoder_dim, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_att_query) net_att_memory = ModelBuilder.build_attention( arch=cfg.MODEL.arch_attention, input_dim=cfg.MODEL.fc_dim, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_att_memory) net_projection = ModelBuilder.build_projection( arch=cfg.MODEL.arch_projection, input_dim=cfg.MODEL.encoder_dim, fc_dim=cfg.MODEL.projection_dim, weights=cfg.MODEL.weights_projection) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), input_dim=cfg.MODEL.decoder_dim, fc_dim=cfg.MODEL.decoder_fc_dim, ppm_dim=cfg.MODEL.ppm_dim, num_class=cfg.TASK.n_ways + 1, weights=cfg.MODEL.weights_decoder, dropout_rate=cfg.MODEL.dropout_rate, use_dropout=cfg.MODEL.use_dropout) if cfg.MODEL.weights_objectness and cfg.MODEL.weights_objectness_decoder: '''net_objectness = ModelBuilder.build_objectness( arch='resnet50_deeplab', weights=cfg.MODEL.weights_objectness, fix_encoder=True) net_objectness_decoder = ModelBuilder.build_decoder( arch='aspp_few_shot', input_dim=2048, fc_dim=256, ppm_dim=256, num_class=2, weights=cfg.MODEL.weights_objectness_decoder, dropout_rate=0.5, use_dropout=True)''' net_objectness = ModelBuilder.build_objectness( arch='hrnetv2', weights=cfg.MODEL.weights_objectness, fix_encoder=True) net_objectness_decoder = ModelBuilder.build_decoder( arch='c1_nodropout', input_dim=720, fc_dim=720, ppm_dim=256, num_class=2, weights=cfg.MODEL.weights_objectness_decoder, use_dropout=False) for param in net_objectness.parameters(): param.requires_grad = False for param in net_objectness_decoder.parameters(): param.requires_grad = False else: net_objectness = None net_objectness_decoder = None crit = nn.NLLLoss(ignore_index=255) segmentation_module = SegmentationAttentionSeparateModule( net_enc_query, net_enc_memory, net_att_query, net_att_memory, net_decoder, net_projection, net_objectness, net_objectness_decoder, crit, zero_memory=cfg.MODEL.zero_memory, random_memory_bias=cfg.MODEL.random_memory_bias, random_memory_nobias=cfg.MODEL.random_memory_nobias, random_scale=cfg.MODEL.random_scale, zero_qval=cfg.MODEL.zero_qval, normalize_key=cfg.MODEL.normalize_key, p_scalar=cfg.MODEL.p_scalar, memory_feature_aggregation=cfg.MODEL.memory_feature_aggregation, memory_noLabel=cfg.MODEL.memory_noLabel, mask_feat_downsample_rate=cfg.MODEL.mask_feat_downsample_rate, att_mat_downsample_rate=cfg.MODEL.att_mat_downsample_rate, objectness_feat_downsample_rate=cfg.MODEL. objectness_feat_downsample_rate, segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate, mask_foreground=cfg.MODEL.mask_foreground, global_pool_read=cfg.MODEL.global_pool_read, average_memory_voting=cfg.MODEL.average_memory_voting, average_memory_voting_nonorm=cfg.MODEL.average_memory_voting_nonorm, mask_memory_RGB=cfg.MODEL.mask_memory_RGB, linear_classifier_support=cfg.MODEL.linear_classifier_support, decay_lamb=cfg.MODEL.decay_lamb, linear_classifier_support_only=cfg.MODEL. linear_classifier_support_only, qread_only=cfg.MODEL.qread_only, feature_as_key=cfg.MODEL.feature_as_key, objectness_multiply=cfg.MODEL.objectness_multiply) print('###### Load data ######') data_name = cfg.DATASET.name if data_name == 'VOC': from dataloaders.customized_objectness_debug import voc_fewshot make_data = voc_fewshot max_label = 20 elif data_name == 'COCO': from dataloaders.customized_objectness_debug import coco_fewshot make_data = coco_fewshot max_label = 80 else: raise ValueError('Wrong config for dataset!') labels = CLASS_LABELS[data_name][cfg.TASK.fold_idx] labels_val = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][ cfg.TASK.fold_idx] if cfg.DATASET.exclude_labels: exclude_labels = labels_val else: exclude_labels = [] transforms = Compose([Resize(size=cfg.DATASET.input_size), RandomMirror()]) dataset = make_data(base_dir=cfg.DATASET.data_dir, split=cfg.DATASET.data_split, transforms=transforms, to_tensor=ToTensorNormalize(), labels=labels, max_iters=cfg.TRAIN.n_iters * cfg.TRAIN.n_batch, n_ways=cfg.TASK.n_ways, n_shots=cfg.TASK.n_shots, n_queries=cfg.TASK.n_queries, permute=cfg.TRAIN.permute_labels, exclude_labels=exclude_labels, use_ignore=cfg.use_ignore) trainloader = DataLoader(dataset, batch_size=cfg.TRAIN.n_batch, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) #segmentation_module = nn.DataParallel(segmentation_module, device_ids=gpus) segmentation_module.cuda() # Set up optimizers nets = (net_enc_query, net_enc_memory, net_att_query, net_att_memory, net_decoder, net_projection, crit) optimizers = create_optimizers(nets, cfg) batch_time = AverageMeter() data_time = AverageMeter() ave_total_loss = AverageMeter() ave_acc = AverageMeter() history = {'train': {'iter': [], 'loss': [], 'acc': []}} segmentation_module.train(not cfg.TRAIN.fix_bn) if net_objectness and net_objectness_decoder: net_objectness.eval() net_objectness_decoder.eval() best_iou = 0 # main loop tic = time.time() print('###### Training ######') for i_iter, sample_batched in enumerate(trainloader): # Prepare input feed_dict = data_preprocess(sample_batched, cfg) data_time.update(time.time() - tic) segmentation_module.zero_grad() # adjust learning rate adjust_learning_rate(optimizers, i_iter, cfg) # forward pass #print(batch_data) loss, acc = segmentation_module(feed_dict) loss = loss.mean() acc = acc.mean() # Backward loss.backward() for optimizer in optimizers: if optimizer: optimizer.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # update average loss and acc ave_total_loss.update(loss.data.item()) ave_acc.update(acc.data.item() * 100) # calculate accuracy, and display if i_iter % cfg.TRAIN.disp_iter == 0: print('Iter: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, ' 'lr_encoder: {:.6f}, lr_decoder: {:.6f}, ' 'Accuracy: {:4.2f}, Loss: {:.6f}'.format( i_iter, i_iter, cfg.TRAIN.n_iters, batch_time.average(), data_time.average(), cfg.TRAIN.running_lr_encoder, cfg.TRAIN.running_lr_decoder, ave_acc.average(), ave_total_loss.average())) history['train']['iter'].append(i_iter) history['train']['loss'].append(loss.data.item()) history['train']['acc'].append(acc.data.item()) if (i_iter + 1) % cfg.TRAIN.save_freq == 0: checkpoint(nets, history, cfg, i_iter + 1) if (i_iter + 1) % cfg.TRAIN.eval_freq == 0: metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs) with torch.no_grad(): print('----Evaluation----') segmentation_module.eval() net_decoder.use_softmax = True for run in range(cfg.VAL.n_runs): print(f'### Run {run + 1} ###') set_seed(cfg.VAL.seed + run) print(f'### Load validation data ###') dataset_val = make_data(base_dir=cfg.DATASET.data_dir, split=cfg.DATASET.data_split, transforms=transforms, to_tensor=ToTensorNormalize(), labels=labels_val, max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch, n_ways=cfg.TASK.n_ways, n_shots=cfg.TASK.n_shots, n_queries=cfg.TASK.n_queries, permute=cfg.VAL.permute_labels, exclude_labels=[]) if data_name == 'COCO': coco_cls_ids = dataset_val.datasets[ 0].dataset.coco.getCatIds() testloader = DataLoader(dataset_val, batch_size=cfg.VAL.n_batch, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print(f"Total # of validation Data: {len(dataset)}") #for sample_batched in tqdm.tqdm(testloader): for sample_batched in testloader: feed_dict = data_preprocess(sample_batched, cfg, is_val=True) if data_name == 'COCO': label_ids = [ coco_cls_ids.index(x) + 1 for x in sample_batched['class_ids'] ] else: label_ids = list(sample_batched['class_ids']) query_pred = segmentation_module( feed_dict, segSize=cfg.DATASET.input_size) metric.record( np.array(query_pred.argmax(dim=1)[0].cpu()), np.array(feed_dict['seg_label'][0].cpu()), labels=label_ids, n_run=run) classIoU, meanIoU = metric.get_mIoU( labels=sorted(labels_val), n_run=run) classIoU_binary, meanIoU_binary = metric.get_mIoU_binary( n_run=run) classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU( labels=sorted(labels_val)) classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary( ) print('----- Evaluation Result -----') print(f'best meanIoU mean: {best_iou}') print(f'meanIoU mean: {meanIoU}') print(f'meanIoU std: {meanIoU_std}') print(f'meanIoU_binary mean: {meanIoU_binary}') print(f'meanIoU_binary std: {meanIoU_std_binary}') checkpoint(nets, history, cfg, 'latest') if meanIoU > best_iou: best_iou = meanIoU checkpoint(nets, history, cfg, 'best') segmentation_module.train(not cfg.TRAIN.fix_bn) if net_objectness and net_objectness_decoder: net_objectness.eval() net_objectness_decoder.eval() net_decoder.use_softmax = False print('Training Done!')
def main(cfg, gpus): torch.cuda.set_device(gpus[0]) # Network Builders net_enc_query = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_enc_query, fix_encoder=cfg.TRAIN.fix_encoder) net_enc_memory = ModelBuilder.build_encoder_memory_separate( arch=cfg.MODEL.arch_memory_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_enc_memory, num_class=cfg.TASK.n_ways + 1, RGB_mask_combine_val=cfg.DATASET.RGB_mask_combine_val, segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate) net_att_query = ModelBuilder.build_attention( arch=cfg.MODEL.arch_attention, input_dim=cfg.MODEL.encoder_dim, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_att_query) net_att_memory = ModelBuilder.build_attention( arch=cfg.MODEL.arch_attention, input_dim=cfg.MODEL.fc_dim, fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_att_memory) net_projection = ModelBuilder.build_projection( arch=cfg.MODEL.arch_projection, input_dim=cfg.MODEL.encoder_dim, fc_dim=cfg.MODEL.projection_dim, weights=cfg.MODEL.weights_projection) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), input_dim=cfg.MODEL.decoder_dim, fc_dim=cfg.MODEL.decoder_fc_dim, ppm_dim=cfg.MODEL.ppm_dim, num_class=cfg.TASK.n_ways + 1, weights=cfg.MODEL.weights_decoder, dropout_rate=cfg.MODEL.dropout_rate, use_dropout=cfg.MODEL.use_dropout, use_softmax=True) if cfg.MODEL.weights_objectness and cfg.MODEL.weights_objectness_decoder: '''net_objectness = ModelBuilder.build_objectness( arch='resnet50_deeplab', weights=cfg.MODEL.weights_objectness, fix_encoder=True) net_objectness_decoder = ModelBuilder.build_decoder( arch='aspp_few_shot', input_dim=2048, fc_dim=256, ppm_dim=256, num_class=2, weights=cfg.MODEL.weights_objectness_decoder, dropout_rate=0.5, use_dropout=True)''' net_objectness = ModelBuilder.build_objectness( arch=cfg.MODEL.arch_objectness, weights=cfg.MODEL.weights_objectness, fix_encoder=True) net_objectness_decoder = ModelBuilder.build_decoder( arch='c1_nodropout', input_dim=cfg.MODEL.decoder_objectness_dim, fc_dim=cfg.MODEL.decoder_objectness_dim, ppm_dim=256, num_class=2, weights=cfg.MODEL.weights_objectness_decoder, use_dropout=False) for param in net_objectness.parameters(): param.requires_grad = False for param in net_objectness_decoder.parameters(): param.requires_grad = False else: net_objectness = None net_objectness_decoder = None crit = nn.NLLLoss(ignore_index=255) segmentation_module = SegmentationAttentionSeparateModule( net_enc_query, net_enc_memory, net_att_query, net_att_memory, net_decoder, net_projection, net_objectness, net_objectness_decoder, crit, zero_memory=cfg.MODEL.zero_memory, zero_qval=cfg.MODEL.zero_qval, normalize_key=cfg.MODEL.normalize_key, p_scalar=cfg.MODEL.p_scalar, memory_feature_aggregation=cfg.MODEL.memory_feature_aggregation, memory_noLabel=cfg.MODEL.memory_noLabel, debug=cfg.is_debug or cfg.eval_att_voting, mask_feat_downsample_rate=cfg.MODEL.mask_feat_downsample_rate, att_mat_downsample_rate=cfg.MODEL.att_mat_downsample_rate, objectness_feat_downsample_rate=cfg.MODEL. objectness_feat_downsample_rate, segm_downsampling_rate=cfg.DATASET.segm_downsampling_rate, mask_foreground=cfg.MODEL.mask_foreground, global_pool_read=cfg.MODEL.global_pool_read, average_memory_voting=cfg.MODEL.average_memory_voting, average_memory_voting_nonorm=cfg.MODEL.average_memory_voting_nonorm, mask_memory_RGB=cfg.MODEL.mask_memory_RGB, linear_classifier_support=cfg.MODEL.linear_classifier_support, decay_lamb=cfg.MODEL.decay_lamb, linear_classifier_support_only=cfg.MODEL. linear_classifier_support_only, qread_only=cfg.MODEL.qread_only, feature_as_key=cfg.MODEL.feature_as_key, objectness_multiply=cfg.MODEL.objectness_multiply) segmentation_module = nn.DataParallel(segmentation_module, device_ids=gpus) segmentation_module.cuda() segmentation_module.eval() print('###### Prepare data ######') data_name = cfg.DATASET.name if data_name == 'VOC': from dataloaders.customized import voc_fewshot make_data = voc_fewshot max_label = 20 elif data_name == 'COCO': from dataloaders.customized import coco_fewshot make_data = coco_fewshot max_label = 80 split = cfg.DATASET.data_split + '2014' annFile = f'{cfg.DATASET.data_dir}/annotations/instances_{split}.json' cocoapi = COCO(annFile) else: raise ValueError('Wrong config for dataset!') labels = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][ cfg.TASK.fold_idx] transforms = [Resize_test(size=cfg.DATASET.input_size)] transforms = Compose(transforms) print('###### Testing begins ######') metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs) with torch.no_grad(): for run in range(cfg.VAL.n_runs): print(f'### Run {run + 1} ###') set_seed(cfg.VAL.seed + run) print(f'### Load data ###') dataset = make_data(base_dir=cfg.DATASET.data_dir, split=cfg.DATASET.data_split, transforms=transforms, to_tensor=ToTensorNormalize(), labels=labels, max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch, n_ways=cfg.TASK.n_ways, n_shots=cfg.TASK.n_shots, n_queries=cfg.TASK.n_queries, permute=cfg.VAL.permute_labels, exclude_labels=[]) if data_name == 'COCO': coco_cls_ids = dataset.datasets[0].dataset.coco.getCatIds() testloader = DataLoader(dataset, batch_size=cfg.VAL.n_batch, shuffle=False, num_workers=1, pin_memory=True, drop_last=False) print(f"Total # of Data: {len(dataset)}") count = 0 if cfg.multi_scale_test: scales = [224, 328, 424] else: scales = [328] for sample_batched in tqdm.tqdm(testloader): feed_dict = data_preprocess(sample_batched, cfg) if data_name == 'COCO': label_ids = [ coco_cls_ids.index(x) + 1 for x in sample_batched['class_ids'] ] else: label_ids = list(sample_batched['class_ids']) for q, scale in enumerate(scales): if len(scales) > 1: feed_dict['img_data'] = nn.functional.interpolate( feed_dict['img_data'].cuda(), size=(scale, scale), mode='bilinear') if cfg.eval_att_voting or cfg.is_debug: query_pred, qread, qval, qk_b, mk_b, mv_b, p, feature_enc, feature_memory = segmentation_module( feed_dict, segSize=(feed_dict['seg_label_noresize'].shape[1], feed_dict['seg_label_noresize'].shape[2])) if cfg.eval_att_voting: height, width = qread.shape[-2], qread.shape[-1] assert p.shape[0] == height * width img_refs_mask_resize = nn.functional.interpolate( feed_dict['img_refs_mask'][0].cuda(), size=(height, width), mode='nearest') img_refs_mask_resize_flat = img_refs_mask_resize[:, 0, :, :].view( img_refs_mask_resize.shape[0], -1) mask_voting_flat = torch.mm( img_refs_mask_resize_flat, p) mask_voting = mask_voting_flat.view( mask_voting_flat.shape[0], height, width) mask_voting = torch.unsqueeze(mask_voting, 0) query_pred = nn.functional.interpolate( mask_voting[:, 0:-1], size=cfg.DATASET.input_size, mode='bilinear', align_corners=False) if cfg.is_debug: np.save( 'debug/img_refs_mask-%04d-%s-%s.npy' % (count, sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), img_refs_mask_resize.detach().cpu().float( ).numpy()) np.save( 'debug/query_pred-%04d-%s-%s.npy' % (count, sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), query_pred.detach().cpu().float().numpy()) if cfg.is_debug: np.save( 'debug/qread-%04d-%s-%s.npy' % (count, sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), qread.detach().cpu().float().numpy()) np.save( 'debug/qval-%04d-%s-%s.npy' % (count, sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), qval.detach().cpu().float().numpy()) #np.save('debug/qk_b-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), qk_b.detach().cpu().float().numpy()) #np.save('debug/mk_b-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), mk_b.detach().cpu().float().numpy()) #np.save('debug/mv_b-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), mv_b.detach().cpu().float().numpy()) #np.save('debug/p-%04d-%s-%s.npy'%(count, sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), p.detach().cpu().float().numpy()) #np.save('debug/feature_enc-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), feature_enc[-1].detach().cpu().float().numpy()) #np.save('debug/feature_memory-%s-%s.npy'%(sample_batched['query_ids'][0][0], sample_batched['support_ids'][0][0][0]), feature_memory[-1].detach().cpu().float().numpy()) else: #query_pred = segmentation_module(feed_dict, segSize=cfg.DATASET.input_size) query_pred = segmentation_module( feed_dict, segSize=(feed_dict['seg_label_noresize'].shape[1], feed_dict['seg_label_noresize'].shape[2])) if q == 0: query_pred_final = query_pred / len(scales) else: query_pred_final += query_pred / len(scales) query_pred = query_pred_final metric.record(np.array(query_pred.argmax(dim=1)[0].cpu()), np.array( feed_dict['seg_label_noresize'][0].cpu()), labels=label_ids, n_run=run) if cfg.VAL.visualize: #print(as_numpy(feed_dict['seg_label'][0].cpu()).shape) #print(as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())).shape) #print(feed_dict['img_data'].cpu().shape) query_name = sample_batched['query_ids'][0][0] support_name = sample_batched['support_ids'][0][0][0] if data_name == 'VOC': img = imread( os.path.join(cfg.DATASET.data_dir, 'JPEGImages', query_name + '.jpg')) else: query_name = int(query_name) img_meta = cocoapi.loadImgs(query_name)[0] img = imread( os.path.join(cfg.DATASET.data_dir, split, img_meta['file_name'])) #img = imresize(img, cfg.DATASET.input_size) visualize_result( (img, as_numpy( feed_dict['seg_label_noresize'][0].cpu()), '%05d' % (count)), as_numpy(np.array(query_pred.argmax(dim=1)[0].cpu())), os.path.join(cfg.DIR, 'result')) count += 1 classIoU, meanIoU = metric.get_mIoU(labels=sorted(labels), n_run=run) classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(n_run=run) '''_run.log_scalar('classIoU', classIoU.tolist()) _run.log_scalar('meanIoU', meanIoU.tolist()) _run.log_scalar('classIoU_binary', classIoU_binary.tolist()) _run.log_scalar('meanIoU_binary', meanIoU_binary.tolist()) _log.info(f'classIoU: {classIoU}') _log.info(f'meanIoU: {meanIoU}') _log.info(f'classIoU_binary: {classIoU_binary}') _log.info(f'meanIoU_binary: {meanIoU_binary}')''' classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU( labels=sorted(labels)) classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary( ) print('----- Final Result -----') print('final_classIoU', classIoU.tolist()) print('final_classIoU_std', classIoU_std.tolist()) print('final_meanIoU', meanIoU.tolist()) print('final_meanIoU_std', meanIoU_std.tolist()) print('final_classIoU_binary', classIoU_binary.tolist()) print('final_classIoU_std_binary', classIoU_std_binary.tolist()) print('final_meanIoU_binary', meanIoU_binary.tolist()) print('final_meanIoU_std_binary', meanIoU_std_binary.tolist()) print(f'classIoU mean: {classIoU}') print(f'classIoU std: {classIoU_std}') print(f'meanIoU mean: {meanIoU}') print(f'meanIoU std: {meanIoU_std}') print(f'classIoU_binary mean: {classIoU_binary}') print(f'classIoU_binary std: {classIoU_std_binary}') print(f'meanIoU_binary mean: {meanIoU_binary}') print(f'meanIoU_binary std: {meanIoU_std_binary}')
input_file = sys.argv[1] # out_dir=sys.argv[1] # if os.path.exists(out_dir)==False: # os.mkdir(out_dir) out_file = sys.argv[2] out_warping_field_path = sys.argv[3] SEG = 300 rho = 0.1 nframe = 20 Nkeep = 5 batchsize = 8 margin = 64 # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder( arch='resnet50dilated', fc_dim=2048, weights='baseline-resnet50dilated-ppm_deepsup/encoder_epoch_20.pth') net_decoder = builder.build_decoder( arch='ppm_deepsup', fc_dim=2048, num_class=150, weights='baseline-resnet50dilated-ppm_deepsup/decoder_epoch_20.pth', use_softmax=True) crit = torch.nn.NLLLoss(ignore_index=-1) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit).cuda() segmentation_module.eval() normalize = transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.])
def main(args): # Network Builders builder = ModelBuilder() net_sound_M = builder.build_sound(arch=args.arch_sound, fc_dim=args.num_channels, weights=args.weights_sound_M) net_frame_M = builder.build_frame(arch=args.arch_frame, fc_dim=args.num_channels, pool_type=args.img_pool, weights=args.weights_frame_M) net_sound_P = builder.build_sound( input_nc=2, arch=args.arch_sound, # fc_dim=args.num_channels, fc_dim=1, weights=args.weights_sound_P) nets = (net_sound_M, net_frame_M, net_sound_P) crit = builder.build_criterion(arch=args.loss) # Wrap networks # set netwrapper forward mode # there are there modes for different training stages # ['Minus', 'Plus', 'Minus_Plus'] netwrapper = NetWrapper(nets, crit, mode=args.forward_mode) netwrapper = torch.nn.DataParallel(netwrapper, device_ids=range(args.num_gpus)) netwrapper.to(args.device) # Dataset and Loader dataset_train = MUSICMixDataset(args.list_train, args, split='train') dataset_val = MUSICMixDataset(args.list_val, args, max_sample=args.num_val, split='val') loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=int(args.workers), drop_last=True) loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=args.batch_size, shuffle=False, num_workers=2, drop_last=False) args.epoch_iters = len(dataset_train) // args.batch_size print('1 Epoch = {} iters'.format(args.epoch_iters)) # Set up optimizer optimizer = MP_Trainer.create_optimizer(nets, args) mp_trainer = MP_Trainer(netwrapper, optimizer, args) # Eval firstly mp_trainer.evaluate(loader_val) if mp_trainer.mode == 'eval': print('Evaluation Done!') else: # start training for epoch in range(1, args.num_epoch + 1): mp_trainer.epoch = epoch mp_trainer.train(loader_train) # Evaluation and visualization if epoch % args.eval_epoch == 0: mp_trainer.evaluate(loader_val) # checkpointing mp_trainer.checkpoint() # adjust learning rate if epoch in args.lr_steps: mp_trainer.adjust_learning_rate() print('Training Done!') mp_trainer.writer.close()
def main(cfg, gpus): # Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) crit = nn.NLLLoss(ignore_index=-1) if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET, batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=cfg.TRAIN.batch_size_per_gpu, # shuffle=(train_sampler is None), # we do not use this param collate_fn=user_scattered_collate, num_workers=cfg.TRAIN.workers, drop_last=True, pin_memory=True, sampler=train_sampler) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) segmentation_module.cuda() if cfg.sync_bn: print("using apex synced BN") segmentation_module = apex.parallel.convert_syncbn_model( segmentation_module) # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) segmentation_module, optimizers = amp.initialize(segmentation_module, optimizers, opt_level="O1") if cfg.distributed: # FOR DISTRIBUTED: After amp.initialize, wrap the model with # apex.parallel.DistributedDataParallel. segmentation_module = DistributedDataParallel(segmentation_module) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): train(segmentation_module, iterator_train, optimizers, history, epoch + 1, cfg) # checkpointing # checkpoint(nets, history, cfg, epoch+1) checkpoint_apex(segmentation_module, history, cfg, epoch + 1) print('Training Done!')
def main(args): # Network Builders builder = ModelBuilder() net_encoder = None net_decoder = None unet = None if args.unet == False: net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=args.num_class, weights=args.weights_decoder) else: unet = builder.build_unet(num_class=args.num_class, arch=args.unet_arch, weights=args.weights_unet) print("Froze the following layers: ") for name, p in unet.named_parameters(): if p.requires_grad == False: print(name) crit = nn.NLLLoss() #crit = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(50)) #crit = nn.CrossEntropyLoss().cuda() #crit = nn.BCELoss() if args.arch_decoder.endswith('deepsup') and args.unet == False: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, args.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, is_unet=args.unet, unet=unet) train_augs = Compose([ RandomSized(224), RandomHorizontallyFlip(), RandomVerticallyFlip(), RandomRotate(180), AdjustContrast(cf=0.25), AdjustBrightness(bf=0.25) ]) #, RandomErasing()]) #train_augs = None # Dataset and Loader dataset_train = TrainDataset(args.list_train, args, batch_per_gpu=args.batch_size_per_gpu, augmentations=train_augs) loader_train = data.DataLoader( dataset_train, batch_size=len(args.gpus), # we have modified data_parallel shuffle=False, # we do not use this param num_workers=int(args.workers), drop_last=True, pin_memory=False) print('1 Epoch = {} iters'.format(args.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) # load nets into gpu if len(args.gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=args.gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) if args.unet == False else (unet, crit) optimizers = create_optimizers(nets, args) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(args.start_epoch, args.num_epoch + 1): train(segmentation_module, iterator_train, optimizers, history, epoch, args) # checkpointing checkpoint(nets, history, args, epoch) print('Training Done!')
def main(args): # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, segSize=args.segSize, weights=args.weights_decoder) crit = nn.NLLLoss2d(ignore_index=-1) # Dataset and Loader dataset_train = Dataset(args.list_train, args, flip=args.flip, is_train=1) dataset_val = Dataset(args.list_val, args, flip=args.flip, max_sample=args.num_val, is_train=0) loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=int(args.workers), drop_last=True) loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=args.batch_size, shuffle=False, num_workers=2, drop_last=True) args.epoch_iters = int(len(dataset_train) / args.batch_size) print('1 Epoch = {} iters'.format(args.epoch_iters)) # load nets into gpu if args.num_gpus > 1: net_encoder = nn.DataParallel(net_encoder, device_ids=range(args.num_gpus)) net_decoder = nn.DataParallel(net_decoder, device_ids=range(args.num_gpus)) nets = (net_encoder, net_decoder, crit) for net in nets: net.cuda() # Set up optimizers optimizers = create_optimizers(nets, args) # Main loop history = { split: { 'iter': [], 'err': [], 'acc': [] } for split in ('train', 'val') } # initial eval evaluate(nets, loader_val, history, 0, args) for epoch in range(1, args.num_epoch + 1): train(nets, loader_train, optimizers, history, epoch, args) # Evaluation and visualization if epoch % args.eval_epoch == 0: evaluate(nets, loader_val, history, epoch, args) # checkpointing if epoch % args.ckpt_epoch == 0: checkpoint(nets, history, epoch, args) # adjust learning rate if epoch % args.lr_step == 0: adjust_learning_rate(optimizers, args) print('Training Done!')
def overlay(img, pred_color, blend_factor=0.3): edges = cv2.Canny(pred_color,20,40) edges = cv2.dilate(edges, np.ones((5,5),np.uint8), iterations=1) out = (1-blend_factor)*img + blend_factor * pred_color edge_pixels = (edges==255) new_color = [0,0,255] for i in range(0,3): timg = out[:,:,i] timg[edge_pixels]=new_color[i] out[:,:,i] = timg return out # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=args.num_class, weights=args.weights_decoder, use_softmax=True) crit = nn.NLLLoss(ignore_index=-1) input_fns = [os.path.join(args.test_folder,f) for f in os.listdir(args.test_folder)] output_fns = [os.path.join(args.result,f[0:-3]+"pgm") for f in os.listdir(args.test_folder)] output_vis_fns = [os.path.join(args.result,"vis_" + f) for f in os.listdir(args.test_folder)] segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) segmentation_module.cuda() segmentation_module.eval() colors = loadmat('data/color150.mat')['colors'] transform = transforms.Compose([transforms.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.])]) feed_dict = {} for f,of,ovf in zip(input_fns,output_fns,output_vis_fns): print("Input: " + f)
def main(): global best_prec1, args args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size # create model if os.path.isfile("architectures.json")==False: print("missing architectures.json!") return modelstr=json.load(open('architectures.json','r'))[args.hardware] print('current hardware:',args.hardware,'model:',modelstr) mconfig=NetworkConfig(args.num_classes).build_modelconfig(modelstr.split(',')) model=ModelBuilder(mconfig,args.num_classes) model=init_weights(model) ## init weights with xavier params=split_weights(model) ## apply no weight decay model = model.cuda() if args.distributed: # shared param/delay all reduce turns off bucketing in DDP, for lower latency runs this can improve perf # for the older version of APEX please use shared_param, for newer one it is delay_allreduce model = DDP(model, delay_allreduce=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() criterion_smooth = CrossEntropyLabelSmooth(args.num_classes, 0.1) criterion_smooth = criterion_smooth.cuda() optimizer = torch.optim.SGD(params, args.lr, momentum=args.momentum, weight_decay=args.weight_decay,nesterov=True) # optionally resume from a checkpoint scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs-5)*1251) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,90]) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # Data loading code traindir = os.path.join(args.dataset, 'train') valdir = os.path.join(args.dataset, 'val') crop_size = 224 val_size = 256 pipe = HybridTrainPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu) pipe.build() train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) pipe = HybridValPipe(batch_size=args.batch_size, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size) pipe.build() val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) if args.evaluate: validate(val_loader, model, criterion) return total_time = AverageMeter() dicts={} iter_per_epoch=int(train_loader._size / args.batch_size) print('iter_per_epoch',iter_per_epoch) #print('here2') for epoch in range(args.start_epoch, args.epochs): avg_train_time = train(train_loader, scheduler,model, criterion_smooth, optimizer, epoch) total_time.update(avg_train_time) if args.prof: break # evaluate on validation set [prec1, prec5] = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint if args.local_rank == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) dicts[epoch]={} dicts[epoch]['top1']=prec1 dicts[epoch]['top5']=prec5 fw=open(args.rootdir+"/"+args.filename+".json",'w') wdata=json.dumps(dicts, indent=4) fw.write(wdata+'\n') fw.flush() save_checkpoint( model.state_dict(),is_best,filename=args.rootdir+"/"+args.filename+".pth.tar") if epoch == args.epochs - 1: print('##Top-1 {0}\n' '##Top-5 {1}\n' '##Perf {2}'.format(prec1, prec5, args.total_batch_size / total_time.avg)) # reset DALI iterators train_loader.reset() val_loader.reset()
def main(): """Create the model and start the training.""" with open(args.config) as f: config = yaml.load(f) for k, v in config['common'].items(): setattr(args, k, v) mkdirs(osp.join("logs/" + args.exp_name)) logger = create_logger('global_logger', "logs/" + args.exp_name + '/log.txt') logger.info('{}'.format(args)) ############################## for key, val in vars(args).items(): logger.info("{:16} {}".format(key, val)) logger.info("random_scale {}".format(args.random_scale)) logger.info("is_training {}".format(args.is_training)) h, w = map(int, args.input_size.split(',')) input_size = (h, w) h, w = map(int, args.input_size_target.split(',')) input_size_target = (h, w) print(type(input_size_target[1])) cudnn.enabled = True args.snapshot_dir = args.snapshot_dir + args.exp_name tb_logger = SummaryWriter("logs/" + args.exp_name) ############################## #validation data h, w = map(int, args.input_size_test.split(',')) input_size_test = (h, w) h, w = map(int, args.com_size.split(',')) com_size = (h, w) h, w = map(int, args.input_size_crop.split(',')) input_size_crop = h, w h, w = map(int, args.input_size_target_crop.split(',')) input_size_target_crop = h, w test_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) test_transform = transforms.Compose([ transforms.Resize((input_size_test[1], input_size_test[0])), transforms.ToTensor(), test_normalize ]) testloader = data.DataLoader(cityscapesDataSet(args.data_dir_target, args.data_list_target_val, crop_size=input_size_test, set='train', transform=test_transform), num_workers=args.num_workers, batch_size=1, shuffle=False, pin_memory=True) with open('./dataset/cityscapes_list/info.json', 'r') as fp: info = json.load(fp) mapping = np.array(info['label2train'], dtype=np.int) label_path_list_val = args.label_path_list_val label_path_list_test = './dataset/cityscapes_list/label.txt' gt_imgs_val = open(label_path_list_val, 'r').read().splitlines() gt_imgs_val = [osp.join(args.data_dir_target_val, x) for x in gt_imgs_val] test1loader = data.DataLoader(cityscapesDataSet(args.data_dir_target, args.data_list_target_test, crop_size=input_size_test, set='val', transform=test_transform), num_workers=args.num_workers, batch_size=1, shuffle=False, pin_memory=True) gt_imgs_test = open(label_path_list_test, 'r').read().splitlines() gt_imgs_test = [ osp.join(args.data_dir_target_test, x) for x in gt_imgs_test ] name_classes = np.array(info['label'], dtype=np.str) interp_val = nn.Upsample(size=(com_size[1], com_size[0]), mode='bilinear', align_corners=True) #### #build model #### builder = ModelBuilder() net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=args.num_classes, weights=args.weights_decoder, use_aux=True) model = SegmentationModule(net_encoder, net_decoder, args.use_aux) if args.num_gpus > 1: model = torch.nn.DataParallel(model) patch_replication_callback(model) model.cuda() nets = (net_encoder, net_decoder, None, None) optimizers = create_optimizer(nets, args) cudnn.enabled = True cudnn.benchmark = True model.train() mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] source_normalize = transforms_seg.Normalize(mean=mean, std=std) mean_mapping = [0.485, 0.456, 0.406] mean_mapping = [item * 255 for item in mean_mapping] if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) source_transform = transforms_seg.Compose([ transforms_seg.Resize([input_size[1], input_size[0]]), segtransforms.RandScale((args.scale_min, args.scale_max)), segtransforms.RandRotate((args.rotate_min, args.rotate_max), padding=mean_mapping, ignore_label=args.ignore_label), #segtransforms.RandomGaussianBlur(), #segtransforms.RandomHorizontalFlip(), segtransforms.Crop([input_size_crop[1], input_size_crop[0]], crop_type='rand', padding=mean_mapping, ignore_label=args.ignore_label), transforms_seg.ToTensor(), source_normalize ]) target_normalize = transforms_seg.Normalize(mean=mean, std=std) target_transform = transforms_seg.Compose([ transforms_seg.Resize([input_size_target[1], input_size_target[0]]), segtransforms.RandScale((args.scale_min, args.scale_max)), segtransforms.RandRotate((args.rotate_min, args.rotate_max), padding=mean_mapping, ignore_label=args.ignore_label), #segtransforms.RandomGaussianBlur(), #segtransforms.RandomHorizontalFlip(), segtransforms.Crop( [input_size_target_crop[1], input_size_target_crop[0]], crop_type='rand', padding=mean_mapping, ignore_label=args.ignore_label), transforms_seg.ToTensor(), target_normalize ]) trainloader = data.DataLoader(GTA5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size, crop_size=input_size, transform=source_transform), batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) trainloader_iter = enumerate(trainloader) targetloader = data.DataLoader(fake_cityscapesDataSet( args.data_dir_target, args.data_list_target, max_iters=args.num_steps * args.iter_size * args.batch_size, crop_size=input_size_target, set=args.set, transform=target_transform), batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) targetloader_iter = enumerate(targetloader) # implement model.optim_parameters(args) to handle different models' lr setting criterion_seg = torch.nn.CrossEntropyLoss(ignore_index=255, reduce=False) criterion_pseudo = torch.nn.BCEWithLogitsLoss(reduce=False).cuda() bce_loss = torch.nn.BCEWithLogitsLoss().cuda() criterion_reconst = torch.nn.L1Loss().cuda() criterion_soft_pseudo = torch.nn.MSELoss(reduce=False).cuda() criterion_box = torch.nn.CrossEntropyLoss(ignore_index=255, reduce=False) interp = nn.Upsample(size=(input_size[1], input_size[0]), align_corners=True, mode='bilinear') interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), align_corners=True, mode='bilinear') # labels for adversarial training source_label = 0 target_label = 1 optimizer_encoder, optimizer_decoder, optimizer_disc, optimizer_reconst = optimizers batch_time = AverageMeter(10) loss_seg_value1 = AverageMeter(10) best_mIoUs = 0 best_test_mIoUs = 0 loss_seg_value2 = AverageMeter(10) loss_reconst_source_value = AverageMeter(10) loss_reconst_target_value = AverageMeter(10) loss_source_disc_value = AverageMeter(10) loss_source_disc_adv_value = AverageMeter(10) loss_balance_value = AverageMeter(10) loss_target_disc_value = AverageMeter(10) loss_target_disc_adv_value = AverageMeter(10) loss_pseudo_value = AverageMeter(10) bounding_num = AverageMeter(10) pseudo_num = AverageMeter(10) loss_bbx_att_value = AverageMeter(10) for i_iter in range(args.num_steps): # train G # don't accumulate grads in D end = time.time() _, batch = trainloader_iter.__next__() images, labels, _ = batch images = Variable(images).cuda(async=True) labels = Variable(labels).cuda(async=True) seg, loss_seg2 = model(images, labels) loss_seg2 = torch.mean(loss_seg2) loss = args.lambda_trade_off * (loss_seg2) ''' source_tensor = Variable(torch.FloatTensor(disc.size()).fill_(source_label)).cuda() loss_source_disc = bce_loss(disc, source_tensor) loss += loss_source_disc * args.lambda_disc ''' # proper normalization #logger.info(loss_seg1.data.cpu().numpy()) loss_seg_value2.update(loss_seg2.data.cpu().numpy()) #loss_source_disc_value.update(loss_source_disc.data.cpu().numpy()) # train with target optimizer_encoder.zero_grad() optimizer_decoder.zero_grad() loss.backward() #optimizer.step() optimizer_encoder.step() optimizer_decoder.step() #optimizer_disc.step() del seg, loss_seg2 batch_time.update(time.time() - end) remain_iter = args.num_steps - i_iter remain_time = remain_iter * batch_time.avg t_m, t_s = divmod(remain_time, 60) t_h, t_m = divmod(t_m, 60) remain_time = '{:02d}:{:02d}:{:02d}'.format(int(t_h), int(t_m), int(t_s)) adjust_learning_rate(optimizer_encoder, i_iter, args.lr_encoder, args) adjust_learning_rate(optimizer_decoder, i_iter, args.lr_decoder, args) if i_iter % args.print_freq == 0: lr_encoder = optimizer_encoder.param_groups[0]['lr'] lr_decoder = optimizer_decoder.param_groups[0]['lr'] logger.info('exp = {}'.format(args.snapshot_dir)) logger.info( 'Iter = [{0}/{1}]\t' 'Time = {batch_time.avg:.3f}\t' 'loss_seg1 = {loss_seg1.avg:4f}\t' 'loss_seg2 = {loss_seg2.avg:.4f}\t' 'loss_source_disc = {loss_source_disc.avg:.4f}\t' 'loss_source_disc_adv = {loss_source_disc_adv.avg:.4f}\t' 'loss_target_disc = {loss_target_disc.avg:.4f}\t' 'loss_target_disc_adv = {loss_target_disc_adv.avg:.4f}\t' 'loss_reconst_source = {loss_reconst_source.avg:.4f}\t' 'loss_bbx_att = {loss_bbx_att.avg:.4f}\t' 'loss_reconst_target = {loss_reconst_target.avg:.4f}\t' 'loss_pseudo = {loss_pseudo.avg:.4f}\t' 'loss_balance = {loss_balance.avg:.4f}\t' 'bounding_num = {bounding_num.avg:.4f}\t' 'pseudo_num = {pseudo_num.avg:4f}\t' 'lr_encoder = {lr_encoder:.8f} lr_decoder = {lr_decoder:.8f}'. format(i_iter, args.num_steps, batch_time=batch_time, loss_seg1=loss_seg_value1, loss_seg2=loss_seg_value2, loss_source_disc=loss_source_disc_value, loss_pseudo=loss_pseudo_value, loss_source_disc_adv=loss_source_disc_adv_value, loss_bbx_att=loss_bbx_att_value, bounding_num=bounding_num, pseudo_num=pseudo_num, loss_target_disc=loss_target_disc_value, loss_target_disc_adv=loss_target_disc_adv_value, loss_reconst_source=loss_reconst_source_value, loss_balance=loss_balance_value, loss_reconst_target=loss_reconst_target_value, lr_encoder=lr_encoder, lr_decoder=lr_decoder)) logger.info("remain_time: {}".format(remain_time)) if not tb_logger is None: tb_logger.add_scalar('loss_seg_value1', loss_seg_value1.avg, i_iter) tb_logger.add_scalar('loss_seg_value2', loss_seg_value2.avg, i_iter) tb_logger.add_scalar('loss_source_disc', loss_source_disc_value.avg, i_iter) tb_logger.add_scalar('loss_source_disc_adv', loss_source_disc_adv_value.avg, i_iter) tb_logger.add_scalar('loss_target_disc', loss_target_disc_value.avg, i_iter) tb_logger.add_scalar('loss_target_disc_adv', loss_target_disc_adv_value.avg, i_iter) tb_logger.add_scalar('bounding_num', bounding_num.avg, i_iter) tb_logger.add_scalar('pseudo_num', pseudo_num.avg, i_iter) tb_logger.add_scalar('loss_pseudo', loss_pseudo_value.avg, i_iter) tb_logger.add_scalar('lr', lr_encoder, i_iter) tb_logger.add_scalar('loss_balance', loss_balance_value.avg, i_iter) ##### #save image result if i_iter % args.save_pred_every == 0 and i_iter != 0: logger.info('taking snapshot ...') model.eval() val_time = time.time() hist = np.zeros((19, 19)) f = open(args.result_dir, 'a') for index, batch in tqdm(enumerate(testloader)): with torch.no_grad(): image, name = batch output2, _ = model(Variable(image).cuda(), None) pred = interp_val(output2) del output2 pred = pred.cpu().data[0].numpy() pred = pred.transpose(1, 2, 0) pred = np.asarray(np.argmax(pred, axis=2), dtype=np.uint8) label = np.array(Image.open(gt_imgs_val[index])) #label = np.array(label.resize(com_size, Image. label = label_mapping(label, mapping) #logger.info(label.shape) hist += fast_hist(label.flatten(), pred.flatten(), 19) mIoUs = per_class_iu(hist) for ind_class in range(args.num_classes): logger.info('===>' + name_classes[ind_class] + ':\t' + str(round(mIoUs[ind_class] * 100, 2))) tb_logger.add_scalar(name_classes[ind_class] + '_mIoU', mIoUs[ind_class], i_iter) mIoUs = round(np.nanmean(mIoUs) * 100, 2) logger.info(mIoUs) tb_logger.add_scalar('val mIoU', mIoUs, i_iter) tb_logger.add_scalar('val mIoU', mIoUs, i_iter) net_encoder, net_decoder, net_disc, net_reconst = nets save_checkpoint(net_encoder, 'encoder', i_iter, args, is_best_test) save_checkpoint(net_decoder, 'decoder', i_iter, args, is_best_test) model.train()
def main(args): # Network Builders builder = ModelBuilder() net_sound_ground = builder.build_sound_ground( arch=args.arch_sound_ground, weights=args.weights_sound_ground) net_frame_ground = builder.build_frame_ground( arch=args.arch_frame_ground, pool_type=args.img_pool, weights=args.weights_frame_ground) net_grounding = builder.build_grounding(arch=args.arch_grounding, weights=args.weights_grounding) nets = (net_sound_ground, net_frame_ground, net_grounding) crit = builder.build_criterion(arch=args.loss) # Dataset and Loader dataset_train = MUSICMixDataset(args.list_train, args, split='train') dataset_val = MUSICMixDataset(args.list_val, args, max_sample=args.num_val, split=args.split) loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=int(args.workers), drop_last=True) loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=args.batch_size, shuffle=False, num_workers=2, drop_last=False) args.epoch_iters = len(dataset_train) // args.batch_size print('1 Epoch = {} iters'.format(args.epoch_iters)) # Wrap networks netWrapper = NetWrapper(nets, crit) netWrapper = torch.nn.DataParallel(netWrapper, device_ids=range(args.num_gpus)) netWrapper.to(args.device) # Set up optimizer optimizer = create_optimizer(nets, args) # History of performance history = { 'train': { 'epoch': [], 'err': [] }, 'val': { 'epoch': [], 'err': [] } } # Eval mode if args.mode == 'eval': evaluate(netWrapper, loader_val, history, 0, args) print('Evaluation Done!') return # Training loop for epoch in range(1, args.num_epoch + 1): train(netWrapper, loader_train, optimizer, history, epoch, args) # Evaluation and visualization if epoch % args.eval_epoch == 0: evaluate(netWrapper, loader_val, history, epoch, args) # checkpointing checkpoint(nets, history, epoch, args) # drop learning rate if epoch in args.lr_steps: adjust_learning_rate(optimizer, args) print('Training Done!')
def main(args): # Dataset dataset_train = Dataset(args, split_name='train', batch_per_gpu=args.batch_size_per_gpu) # Network Builders builder = ModelBuilder() net_encoder = builder.build_encoder(arch=args.arch_encoder, fc_dim=args.fc_dim, freeze_until=args.freeze_until, weights=args.weights_encoder) net_decoder = builder.build_decoder(arch=args.arch_decoder, fc_dim=args.fc_dim, num_class=dataset_train.num_classes, weights=args.weights_decoder) net_encoder.train() crit = nn.NLLLoss(ignore_index=-1) if args.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, args.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # loader loader_train = torchdata.DataLoader( dataset_train, batch_size=len(args.gpus), # we have modified data_parallel shuffle=False, collate_fn=user_scattered_collate, num_workers=int(args.workers), drop_last=True, pin_memory=True) print('1 Epoch = {} iters'.format(args.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) # load nets into gpu if len(args.gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=args.gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, args) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(args.start_epoch, args.num_epoch + 1): train(segmentation_module, iterator_train, optimizers, history, epoch, args) # checkpointing checkpoint(nets, history, args, epoch) print('Training Done!')
def main(args): # import network architecture builder = ModelBuilder() model = builder.build_net( arch=args.id, num_input=args.num_input, num_classes=args.num_classes, num_branches=args.num_branches, padding_list=args.padding_list, dilation_list=args.dilation_list) model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpus))).cuda() cudnn.benchmark = True if args.resume: if os.path.isfile(args.resume): print("=> Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) state_dict = checkpoint['state_dict'] model.load_state_dict(state_dict) print("=> Loaded checkpoint (epoch {})".format(checkpoint['epoch'])) else: raise Exception("=> No checkpoint found at '{}'".format(args.resume)) # initialization num_ignore = 0 margin = [args.crop_size[k] - args.center_size[k] for k in range(3)] num_images = int(len(val_dir)/args.num_input) dice_score = np.zeros([num_images, 3]).astype(float) for i in range(num_images): # load the images, label and mask im = [] for j in range(args.num_input): direct, _ = val_dir[args.num_input * i + j].split("\n") name = direct if j < args.num_input - 1: image = nib.load(args.root_path + direct + '.gz').get_data() image = np.expand_dims(image, axis=0) im.append(image) if j == 0: mask = nib.load(args.root_path + direct + "/mask.nii.gz").get_data() else: labels = nib.load(args.root_path + direct + '.gz').get_data() images = np.concatenate(im, axis=0).astype(float) # divide the input images input small image segments # return the padding input images which can be divided exactly image_pad, mask_pad, label_pad, num_segments, padding_index, index = segment(images, mask, labels, args) # initialize prediction for the whole image as background labels_shape = list(labels.shape) labels_shape.append(args.num_classes) pred = np.zeros(labels_shape) pred[:,:,:,0] = 1 # initialize the prediction for a small segmentation as background pad_shape = [int(num_segments[k] * args.center_size[k]) for k in range(3)] pad_shape.append(args.num_classes) pred_pad = np.zeros(pad_shape) pred_pad[:,:,:,0] = 1 # score_per_image stores the sum of each image score_per_image = np.zeros([3, 3]) # iterate over the z dimension for idz in range(num_segments[2]): tf = ValDataset(image_pad, label_pad, mask_pad, num_segments, idz, args) val_loader = DataLoader(tf, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.num_workers, pin_memory=False) score_seg, pred_seg = val(val_loader, model, num_segments, args) pred_pad[:, :, idz*args.center_size[2]:(idz+1)*args.center_size[2], :] = pred_seg score_per_image += score_seg # decide the start and end point in the original image for k in range(3): if index[0][k] == 0: index[0][k] = int(margin[k]/2 - padding_index[0][k]) else: index[0][k] = int(margin[k]/2 + index[0][k]) index[1][k] = int(min(index[0][k] + num_segments[k] * args.center_size[k], labels.shape[k])) dist = [index[1][k] - index[0][k] for k in range(3)] pred[index[0][0]:index[1][0], index[0][1]:index[1][1], index[0][2]:index[1][2]] = pred_pad[:dist[0], :dist[1], :dist[2]] if np.sum(score_per_image[0,:]) == 0 or np.sum(score_per_image[1,:]) == 0 or np.sum(score_per_image[2,:]) == 0: num_ignore += 1 continue # compute the Enhance, Core and Whole dice score dice_score_per = [2 * np.sum(score_per_image[k,2]) / (np.sum(score_per_image[k,0]) + np.sum(score_per_image[k,1])) for k in range(3)] print('Image: %d, Enhance score: %.4f, Core score: %.4f, Whole score: %.4f' % (i, dice_score_per[0], dice_score_per[1], dice_score_per[2])) dice_score[i, :] = dice_score_per count_image = num_images - num_ignore dice_score = dice_score[:count_image,:] mean_dice = np.mean(dice_score, axis=0) std_dice = np.std(dice_score, axis=0) print('Evalution Done!') print('Enhance score: %.4f, Core score: %.4f, Whole score: %.4f, Mean Dice score: %.4f' % (mean_dice[0], mean_dice[1], mean_dice[2], np.mean(mean_dice))) print('Enhance std: %.4f, Core std: %.4f, Whole std: %.4f, Mean Std: %.4f' % (std_dice[0], std_dice[1], std_dice[2], np.mean(std_dice))) if np.mean(mean_dice) > args.best_mean: args.best_epoch = args.epoch_index args.best_mean = np.mean(mean_dice)
def main(cfg, gpus): if 'CITYSCAPE' in cfg.DATASET.list_train: crit = nn.NLLLoss(ignore_index=19) else: crit = nn.NLLLoss(ignore_index=-2) # Segmentation Network Builders net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder, dilate_rate=cfg.DATASET.segm_downsampling_rate) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg) segmentation_module.cuda() nets = (net_encoder, net_decoder, crit) # Foveation Network Builders if cfg.MODEL.foveation: net_foveater = ModelBuilder.build_foveater( in_channel=cfg.MODEL.in_dim, out_channel=len(cfg.MODEL.patch_bank), len_gpus=len(gpus), weights=cfg.MODEL.weights_foveater, cfg=cfg) foveation_module = FovSegmentationModule(net_foveater, cfg, len_gpus=len(gpus)) foveation_module.cuda() nets = (net_encoder, net_decoder, crit, net_foveater) # Set up optimizers optimizers = create_optimizers(nets, cfg) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=len(gpus), # customerized pre-batched dataset pin_memory=True) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) # Main loop for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): if cfg.MODEL.foveation: train(segmentation_module, iterator_train, optimizers, epoch + 1, cfg, history=None, foveation_module=foveation_module) else: train(segmentation_module, iterator_train, optimizers, epoch + 1, cfg) # save checkpoint checkpoint_last(nets, cfg, epoch + 1) # eval during train if cfg.MODEL.foveation: val_iou, val_acc, F_Xlr, F_Xlr_score_flat = eval_during_train(cfg) else: val_iou, val_acc = eval_during_train(cfg) print('Training Done!')
def main(args): # import network architecture builder = ModelBuilder() model = builder.build_net( arch=args.id, num_input=args.num_input + 1, num_classes=args.num_classes, num_branches=args.num_branches, padding_list=args.padding_list, dilation_list=args.dilation_list) model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpus))).cuda() cudnn.benchmark = True if args.resume: if os.path.isfile(args.resume): print("=> Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) state_dict = checkpoint['state_dict'] model.load_state_dict(state_dict) print("=> Loaded checkpoint (epoch {})".format(checkpoint['epoch'])) else: raise Exception("=> No checkpoint found at '{}'".format(args.resume)) # initialization num_ignore = 0 margin = [args.crop_size[k] - args.center_size[k] for k in range(3)] num_images = int(len(test_dir)/args.num_input) dice_score = np.zeros([num_images, 3]).astype(float) for i in range(num_images): # load the images and mask im = [] for j in range(args.num_input): direct, _ = test_dir[args.num_input * i + j].split("\n") name = direct image = nib.load(args.root_path + direct).get_data() image = np.expand_dims(image, axis=0) im.append(image) if j == 0: mask = nib.load(args.root_path + direct + "mask/mask.nii").get_data() images = np.concatenate(im, axis=0).astype(float) # divide the input images input small image segments # return the padding input images which can be divided exactly image_pad, mask_pad, num_segments, padding_index, index = segment(images, mask, args) # initialize prediction for the whole image as background mask_shape = list(mask.shape) mask_shape.append(args.num_classes) pred = np.zeros(mask_shape) pred[:,:,:,0] = 1 # initialize the prediction for a small segmentation as background pad_shape = [int(num_segments[k] * args.center_size[k]) for k in range(3)] pad_shape.append(args.num_classes) pred_pad = np.zeros(pad_shape) pred_pad[:,:,:,0] = 1 # iterate over the z dimension for idz in range(num_segments[2]): tf = TestDataset(image_pad, mask_pad, num_segments, idz, args) test_loader = DataLoader(tf, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.num_workers, pin_memory=False) pred_seg = test(test_loader, model, num_segments, args) pred_pad[:, :, idz*args.center_size[2]:(idz+1)*args.center_size[2], :] = pred_seg # decide the start and end point in the original image for k in range(3): if index[0][k] == 0: index[0][k] = int(margin[k]/2 - padding_index[0][k]) else: index[0][k] = int(margin[k]/2 + index[0][k]) index[1][k] = int(min(index[0][k] + num_segments[k] * args.center_size[k], mask.shape[k])) dist = [index[1][k] - index[0][k] for k in range(3)] pred[index[0][0]:index[1][0], index[0][1]:index[1][1], index[0][2]:index[1][2]] = pred_pad[:dist[0], :dist[1], :dist[2]] if args.visualize: vis = np.argmax(pred, axis=3) vis = np.swapaxes(vis, 0, 2).astype(dtype=np.uint8) visualize_result(name, vis, args) print('Evalution Done!')
def train_model(audio_gen: AudioGenerator, model_builder: ModelBuilder, # pickle_path, # save_model_path, # train_json='train_corpus.json', # valid_json='valid_corpus.json', optimizer=SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5), # optimizer=Adam(lr=1e-01), epochs=30, verbose=0, # sort_by_duration=False, loss_limit=400): # create a class instance for obtaining batches of data input_dim = audio_gen.input_dim if audio_gen.max_length is None: model = model_builder.model(input_shape=(None, input_dim), output_dim=29) else: model = model_builder.model(input_shape=(audio_gen.max_length, input_dim), output_dim=29) model_name = ("Spec" if audio_gen.spectrogram else "MFCC") + " " + model.name model.name = model_name save_model_path = model.name + ".h5" # add the training data to the generator # audio_gen.load_train_data(train_json) # audio_gen.load_validation_data(valid_json) # calculate steps_per_epoch num_train_examples = len(audio_gen.train_audio_paths) steps_per_epoch = num_train_examples // audio_gen.minibatch_size # calculate validation_steps num_valid_samples = len(audio_gen.valid_audio_paths) validation_steps = num_valid_samples // audio_gen.minibatch_size # add CTC loss to the NN specified in input_to_softmax pre_model = model model = add_ctc_loss(model) # CTC loss is implemented elsewhere, so use a dummy lambda function for the loss model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer) # make results/ directory, if necessary if not os.path.exists('results'): os.makedirs('results') # add model_checkpoint model_checkpoint = ModelCheckpoint(filepath='results/' + save_model_path, verbose=0, save_best_only=True) terminate_on_na_n = TerminateOnNaN() if verbose > 0: callbacks = [model_checkpoint, terminate_on_na_n] else: metrics_logger = MetricsLogger(model_name=model_name, n_epochs=epochs, loss_limit=loss_limit) callbacks = [model_checkpoint, metrics_logger] # callbacks = [model_checkpoint, metrics_logger, terminate_on_na_n] try: # hist = \ model.fit_generator(generator=audio_gen.next_train(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=audio_gen.next_valid(), validation_steps=validation_steps, callbacks=callbacks, verbose=verbose) # hist.history["name"] = model_name # save model loss # pickle_file_name = 'results/' + pickle_path # print("Writing hist.history[\"name\"] = ", model_name, "to ", pickle_file_name) # with open(pickle_file_name, 'wb') as f: # pickle.dump(hist.history, f) except KeyboardInterrupt: display.clear_output(wait=True) # print("Training interrupted") except Exception: try: exc_info = sys.exc_info() finally: # Display the *original* exception traceback.print_exception(*exc_info) del exc_info finally: pre_model.summary() del pre_model del model return model_name