def main(args, logger): cudnn.enabled = True # Enables bencnmark mode in cudnn, to enable the inbuilt cudnn.benchmark = True # cudnn auto-tuner to find the best algorithm to use for # our hardware #Setup random seed # cudnn.deterministic = True # ensure consistent results # if benchmark = True, deterministic will be False. seed = random.randint(1, 10000) print('======>random seed {}'.format(seed)) logger.info('======>random seed {}'.format(seed)) random.seed(seed) # python random seed np.random.seed(seed) # set numpy random seed torch.manual_seed(seed) # set random seed for cpu if torch.cuda.is_available(): # torch.cuda.manual_seed(seed) # set random seed for GPU now torch.cuda.manual_seed_all(seed) # set random seed for all GPU # Setup device device = torch.device( "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu") # setup DatasetLoader train_set = vaihingenloader(root=args.root, split='train') test_set = vaihingenloader(root=args.root, split='test') kwargs = {'num_workers': args.workers, 'pin_memory': True} train_loader = DataLoader(train_set, batch_size=args.batch_size, drop_last=True, shuffle=True, **kwargs) test_loader = DataLoader(test_set, batch_size=1, drop_last=False, shuffle=False, **kwargs) # setup optimization criterion criterion = utils.utils.cross_entropy2d # setup model print('======> building network') logger.info('======> building network') # model = FCNRes101().to(device) # model = FCNRes34().cuda() # model = UNet(n_channels=3, n_classes=6,).to(device) if torch.cuda.device_count() > 1: device_ids = list(map(int, args.gpu.split(','))) model = nn.DataParallel(model, device_ids=device_ids) print("======> computing network parameters") logger.info("======> computing network parameters") total_paramters = netParams(model) print("the number of parameters: " + str(total_paramters)) logger.info("the number of parameters: " + str(total_paramters)) # setup optimizer optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) # optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=5e-4) # setup savedir args.savedir = (args.savedir + '/' + args.model + 'bs' + str(args.batch_size) + 'gpu' + str(args.gpu) + '/') if not os.path.exists(args.savedir): os.makedirs(args.savedir) start_epoch = 0 flag = True best_epoch = 0. best_overall = 0. best_mIoU = 0. best_F1 = 0. while flag == True: for epoch in range(start_epoch, args.max_epochs): print('======> Epoch {} starting train.'.format(epoch)) logger.info('======> Epoch {} starting train.'.format(epoch)) train(args, train_loader, model, criterion, optimizer, epoch, logger) print('======> Epoch {} train finish.'.format(epoch)) logger.info('======> Epoch {} train finish.'.format(epoch)) if epoch % 1 == 0 or (epoch + 1) == args.max_epochs: print( 'Now Epoch {}, starting evaluate on Test dataset.'.format( epoch)) logger.info('Now starting evaluate on Test dataset.') print('length of test set:', len(test_loader)) logger.info('length of test set: {}'.format(len(test_loader))) score, class_iou, class_F1 = test(args, test_loader, model, criterion, epoch, logger) for k, v in score.items(): print('{}: {:.5f}'.format(k, v)) logger.info('======>{0:^18} {1:^10}'.format(k, v)) print('Now print class iou') for k, v in class_iou.items(): print('{}: {:.5f}'.format(k, v)) logger.info('======>{0:^18} {1:^10}'.format(k, v)) print('Now print class_F1') for k, v in class_F1.items(): print('{}: {:.5f}'.format(k, v)) logger.info('======>{0:^18} {1:^10}'.format(k, v)) if score["Mean IoU : \t"] > best_mIoU: best_mIoU = score["Mean IoU : \t"] if score["Overall Acc : \t"] > best_overall: best_overall = score["Overall Acc : \t"] # save model in best overall Acc model_file_name = args.savedir + '/model.pth' torch.save(model.state_dict(), model_file_name) best_epoch = epoch if score["Mean F1 : \t"] > best_F1: best_F1 = score["Mean F1 : \t"] print(f"best mean IoU: {best_mIoU}") print(f"best overall : {best_overall}") print(f"best F1: {best_F1}") print(f"best epoch: {best_epoch}") if (epoch + 1) == args.max_epochs: # print('the best pred mIoU: {}'.format(best_pred)) flag = False break
def main(args, logger, summary): seed = 6000 torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. np.random.seed(seed) # Numpy module. random.seed(seed) # Python random module. torch.manual_seed(seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True train_set = SkmtDataSet(args, split='train') val_set = SkmtDataSet(args, split='val') kwargs = {'num_workers': args.workers, 'pin_memory': True} sampler = CustomRandomSampler(train_set, batch_size=args.batch_size) batch_sampler = BatchSampler(sampler) def worker_init_fn(worker_id): np.random.seed(int(seed)) train_loader = DataLoader(train_set, batch_sampler=batch_sampler, worker_init_fn=worker_init_fn, **kwargs) test_loader = DataLoader(val_set, batch_size=1, drop_last=True, worker_init_fn=worker_init_fn, shuffle=False, **kwargs) logger.info('======> building network') # set model model = build_skmtnet(backbone='wide_resnet50_2', auxiliary_head=args.auxiliary, trunk_head=args.trunk_head, num_classes=args.num_classes, output_stride=32) logger.info("======> computing network parameters") total_paramters = netParams(model) logger.info("the number of parameters: " + str(total_paramters)) # setup optimizer optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) # setup savedir args.savedir = (args.savedir + '/' + args.model + 'bs' + str(args.batch_size) + 'gpu' + str(args.gpus) + '/') if not os.path.exists(args.savedir): os.makedirs(args.savedir) # setup optimization criterion # , weight = np.array(SkmtDataSet.CLASSES_PIXS_WEIGHTS) CRITERION = dict( auxiliary=dict( losses=dict( # smoothce=dict(size_average=True), # iou=dict(n_classes=11) ce=dict(reduction='mean') # dice=dict(smooth=1, p=2, reduction='mean') ), loss_weights=[1]), trunk=dict( losses=dict( # smoothce=dict(size_average=True), # iou=dict(n_classes=11) # focal=dict(reduction='mean') ce=dict(reduction='mean') # dice=dict(smooth=1, p=2, reduction='mean') ), trunk=dict( losses=dict(ce=dict(reduction='mean') # dice=dict(smooth=1, p=2, reduction='mean') ), loss_weights=[1]))) criterion = build_criterion(**CRITERION) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # set random seed for all GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus model = model.cuda() criterion = criterion.cuda() start_epoch = 0 best_mIoU = 0. trainer = Trainer(args=args, dataloader=train_loader, model=model, optimizer=optimizer, criterion=criterion, logger=logger, summary=summary) tester = Tester(args=args, dataloader=test_loader, model=model, criterion=criterion, logger=logger, summary=summary) writer = summary.create_summary() for epoch in range(start_epoch, args.max_epochs): trainer.train_one_epoch(epoch, writer, best_mIoU) if (epoch % 1 == 0): Acc, mAcc, mIoU, FWIoU, tb_overall = tester.test_one_epoch( epoch, writer) new_pred = mIoU if new_pred > best_mIoU: best_mIoU = new_pred best_overall = tb_overall # save the model model_file_name = args.savedir + '/best_model.pth' state = { "epoch": epoch + 1, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "criterion": criterion.state_dict() } torch.save(state, model_file_name) logger.info("======>best epoch:") logger.info(best_overall) model_file_name = args.savedir + '/resume_model.pth' state = { "epoch": epoch + 1, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "criterion": criterion.state_dict() } torch.save(state, model_file_name)
def test_model(args): """ main function for testing args: args: global arguments """ print("=====> Check if the cached file exists ") if not os.path.isfile(args.inform_data_file): print("%s is not found" % (args.inform_data_file)) dataCollect = CamVidTrainInform( args.data_dir, args.classes, train_set_file=args.dataset_list, inform_data_file=args.inform_data_file ) #collect mean std, weigth_class information datas = dataCollect.collectDataAndSave() if datas is None: print('Error while pickling data. Please check.') exit(-1) else: print("%s exists" % (args.inform_data_file)) datas = pickle.load(open(args.inform_data_file, "rb")) print(args) global network_type if args.cuda: print("=====> Use gpu id: '{}'".format(args.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not torch.cuda.is_available(): raise Exception( "No GPU found or Wrong gpu id, please run without --cuda") args.seed = random.randint(1, 10000) print("Random Seed: ", args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.enabled = True M = args.M N = args.N model = CGNet.Context_Guided_Network(classes=args.classes, M=M, N=N) network_type = "CGNet" print("=====> current architeture: CGNet_M%sN%s" % (M, N)) total_paramters = netParams(model) print("the number of parameters: " + str(total_paramters)) print("data['classWeights']: ", datas['classWeights']) weight = torch.from_numpy(datas['classWeights']) print("=====> Dataset statistics") print("mean and std: ", datas['mean'], datas['std']) # define optimization criteria criteria = CrossEntropyLoss2d(weight, args.ignore_label) if args.cuda: model = model.cuda() criteria = criteria.cuda() #load test set train_transform = transforms.Compose([transforms.ToTensor()]) testLoader = data.DataLoader(CamVidValDataSet(args.data_dir, args.test_data_list, f_scale=1, mean=datas['mean']), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) if args.resume: if os.path.isfile(args.resume): print("=====> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) #model.load_state_dict(convert_state_dict(checkpoint['model'])) model.load_state_dict(checkpoint['model']) else: print("=====> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True print("=====> beginning test") print("length of test set:", len(testLoader)) mIOU_val, per_class_iu = test(args, testLoader, model, criteria) print(mIOU_val) print(per_class_iu)
def train_model(args): """ args: args: global arguments """ h, w = map(int, args.input_size.split(',')) input_size = (h, w) print("=====> checking if inform_data_file exists") if not os.path.isfile(args.inform_data_file): print("%s is not found" % (args.inform_data_file)) dataCollect = CityscapesTrainInform( args.data_dir, args.classes, train_set_file=args.dataset_list, inform_data_file=args.inform_data_file ) #collect mean std, weigth_class information datas = dataCollect.collectDataAndSave() if datas is None: print("error while pickling data. Please check.") exit(-1) else: print("find file: ", str(args.inform_data_file)) datas = pickle.load(open(args.inform_data_file, "rb")) print(args) global network_type if args.cuda: print("=====> use gpu id: '{}'".format(args.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not torch.cuda.is_available(): raise Exception( "No GPU found or Wrong gpu id, please run without --cuda") #args.seed = random.randint(1, 10000) args.seed = 9830 print("====> Random Seed: ", args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.enabled = True model = MobileNetV3(model_mode="SMALL", num_classes=args.classes) network_type = "MobileNetV3" print("=====> current architeture: MobileNetV3") print("=====> computing network parameters") total_paramters = netParams(model) print("the number of parameters: " + str(total_paramters)) print("data['classWeights']: ", datas['classWeights']) print('=====> Dataset statistics') print('mean and std: ', datas['mean'], datas['std']) # define optimization criteria weight = torch.from_numpy(datas['classWeights']) criteria = CrossEntropyLoss2d(weight) if args.cuda: criteria = criteria.cuda() if torch.cuda.device_count() > 1: print("torch.cuda.device_count()=", torch.cuda.device_count()) args.gpu_nums = torch.cuda.device_count() model = torch.nn.DataParallel( model).cuda() #multi-card data parallel else: print("single GPU for training") model = model.cuda() #1-card data parallel args.savedir = (args.savedir + args.dataset + '/' + network_type + 'bs' + str(args.batch_size) + 'gpu' + str(args.gpu_nums) + "_" + str(args.train_type) + '/') if not os.path.exists(args.savedir): os.makedirs(args.savedir) train_transform = transforms.Compose([transforms.ToTensor()]) trainLoader = data.DataLoader(CityscapesDataSet(args.data_dir, args.train_data_list, crop_size=input_size, scale=args.random_scale, mirror=args.random_mirror, mean=datas['mean']), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) valLoader = data.DataLoader(CityscapesValDataSet(args.data_dir, args.val_data_list, f_scale=1, mean=datas['mean']), batch_size=1, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) start_epoch = 0 if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) #model.load_state_dict(convert_state_dict(checkpoint['model'])) print("=====> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=====> no checkpoint found at '{}'".format(args.resume)) model.train() cudnn.benchmark = True logFileLoc = args.savedir + args.logFile if os.path.isfile(logFileLoc): logger = open(logFileLoc, 'a') logger.write("\nGlobal configuration as follows:") for key, value in vars(args).items(): logger.write("\n{:16} {}".format(key, value)) logger.write("\nParameters: %s" % (str(total_paramters))) logger.write( "\n%s\t\t%s\t\t%s\t\t%s\t\t%s\t\t" % ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val)')) else: logger = open(logFileLoc, 'w') logger.write("Global configuration as follows:") for key, value in vars(args).items(): logger.write("\n{:16} {}".format(key, value)) logger.write("\nParameters: %s" % (str(total_paramters))) logger.write( "\n%s\t\t%s\t\t%s\t\t%s\t\t%s\t\t" % ('Epoch', 'Loss(Tr)', 'Loss(val)', 'mIOU (tr)', 'mIOU (val)')) logger.flush() optimizer = torch.optim.Adam(model.parameters(), args.lr, (0.9, 0.999), eps=1e-08, weight_decay=5e-4) print('=====> beginning training') for epoch in range(start_epoch, args.max_epochs): #training lossTr, per_class_iu_tr, mIOU_tr, lr = train(args, trainLoader, model, criteria, optimizer, epoch) #validation if epoch % 50 == 0: mIOU_val, per_class_iu = val(args, valLoader, model, criteria) # record train information logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.7f" % (epoch, lossTr, mIOU_tr, mIOU_val, lr)) logger.flush() print("Epoch : " + str(epoch) + ' Details') print( "\nEpoch No.: %d\tTrain Loss = %.4f\t mIOU(tr) = %.4f\t mIOU(val) = %.4f\t lr= %.6f" % (epoch, lossTr, mIOU_tr, mIOU_val, lr)) else: # record train information logger.write("\n%d\t\t%.4f\t\t%.4f\t\t%.7f" % (epoch, lossTr, mIOU_tr, lr)) logger.flush() print("Epoch : " + str(epoch) + ' Details') print( "\nEpoch No.: %d\tTrain Loss = %.4f\t mIOU(tr) = %.4f\t lr= %.6f" % (epoch, lossTr, mIOU_tr, lr)) #save the model model_file_name = args.savedir + '/model_' + str(epoch + 1) + '.pth' state = {"epoch": epoch + 1, "model": model.state_dict()} if epoch > args.max_epochs - 10: torch.save(state, model_file_name) elif not epoch % 20: torch.save(state, model_file_name) logger.close()
def main(args, logger, summary): cudnn.enabled = True # Enables bencnmark mode in cudnn, to enable the inbuilt cudnn.benchmark = True # cudnn auto-tuner to find the best algorithm to use for # our hardware seed = random.randint(1, 10000) logger.info('======>random seed {}'.format(seed)) random.seed(seed) # python random seed np.random.seed(seed) # set numpy random seed torch.manual_seed(seed) # set random seed for cpu # train_set = VaiHinGen(root=args.root, split='trainl',outer_size=2*args.image_size,centre_size=args.image_size) # test_set = VaiHinGen(root=args.root, split='testl',outer_size=2*args.image_size,centre_size=args.image_size) train_set = SkmtDataSet(args, split='train') val_set = SkmtDataSet(args, split='val') kwargs = {'num_workers': args.workers, 'pin_memory': True} train_loader = DataLoader(train_set, batch_size=args.batch_size, drop_last=True, shuffle=False, **kwargs) test_loader = DataLoader(val_set, batch_size=1, drop_last=True, shuffle=False, **kwargs) logger.info('======> building network') # set model model = build_skmtnet(backbone='resnet50', auxiliary_head=args.auxiliary, trunk_head='deeplab', num_classes=args.num_classes, output_stride=16) logger.info("======> computing network parameters") total_paramters = netParams(model) logger.info("the number of parameters: " + str(total_paramters)) # setup optimizer optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) # setup savedir args.savedir = (args.savedir + '/' + args.model + 'bs' + str(args.batch_size) + 'gpu' + str(args.gpus) + '/') if not os.path.exists(args.savedir): os.makedirs(args.savedir) # setup optimization criterion criterion = Loss(args) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # set random seed for all GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus model = nn.DataParallel(model).cuda() criterion = criterion.cuda() start_epoch = 0 best_epoch = 0. best_overall = 0. best_mIoU = 0. best_F1 = 0. trainer = Trainer(args=args, dataloader=train_loader, model=model, optimizer=optimizer, criterion=criterion, logger=logger, summary=summary) tester = Tester(args=args, dataloader=test_loader, model=model, criterion=criterion, logger=logger, summary=summary) writer = summary.create_summary() for epoch in range(start_epoch, args.max_epochs): trainer.train_one_epoch(epoch, writer) if (epoch % args.show_val_interval == 0): score, class_iou, class_acc, class_F1 = tester.test_one_epoch( epoch, writer) logger.info('======>Now print overall info:') for k, v in score.items(): logger.info('======>{0:^18} {1:^10}'.format(k, v)) logger.info('======>Now print class acc') for k, v in class_acc.items(): print('{}: {:.5f}'.format(k, v)) logger.info('======>{0:^18} {1:^10}'.format(k, v)) logger.info('======>Now print class iou') for k, v in class_iou.items(): print('{}: {:.5f}'.format(k, v)) logger.info('======>{0:^18} {1:^10}'.format(k, v)) logger.info('======>Now print class_F1') for k, v in class_F1.items(): logger.info('======>{0:^18} {1:^10}'.format(k, v)) if score["Mean IoU(8) : \t"] > best_mIoU: best_mIoU = score["Mean IoU(8) : \t"] if score["Overall Acc : \t"] > best_overall: best_overall = score["Overall Acc : \t"] # save model in best overall Acc model_file_name = args.savedir + '/best_model.pth' torch.save(model.state_dict(), model_file_name) best_epoch = epoch if score["Mean F1 : \t"] > best_F1: best_F1 = score["Mean F1 : \t"] logger.info("======>best mean IoU:{}".format(best_mIoU)) logger.info("======>best overall : {}".format(best_overall)) logger.info("======>best F1: {}".format(best_F1)) logger.info("======>best epoch: {}".format(best_epoch)) # save the model model_file_name = args.savedir + '/model.pth' state = {"epoch": epoch + 1, "model": model.state_dict()} logger.info('======> Now begining to save model.') torch.save(state, model_file_name) logger.info('======> Save done.')