def main(): # Build model. model = model_builder.build_model(cfg=cfg) # Read checkpoint. ckpt = torch.load( cfg.MODEL.PATH2CKPT, map_location=torch.device("cpu")) if cfg.GENERAL.RESUME else {} if cfg.GENERAL.RESUME: with utils.log_info(msg="Load pre-trained model.", level="INFO", state=True): model.load_state_dict(ckpt["model"]) # Set device. model, device = utils.set_device(model, cfg.GENERAL.GPU) try: test_data_loader = data_loader.build_data_loader( cfg, cfg.DATA.DATASET, "test") generate(cfg=cfg, model=model, data_loader=test_data_loader, device=device) except: utils.notify("Can not build data loader for test set.", level="ERROR") raise ValueError("")
def create_model_ops(model, loss_scale): return model_builder.build_model( model=model, model_name=args.model_name, model_depth=args.model_depth, num_labels=args.num_labels, batch_size=args.batch_size, num_channels=args.num_channels, crop_size=args.crop_size, clip_length=(args.clip_length_of if args.input_type else args.clip_length_rgb), loss_scale=loss_scale, pred_layer_name=args.pred_layer_name, multi_label=args.multi_label, channel_multiplier=args.channel_multiplier, bottleneck_multiplier=args.bottleneck_multiplier, use_dropout=args.use_dropout, conv1_temporal_stride=args.conv1_temporal_stride, conv1_temporal_kernel=args.conv1_temporal_kernel, use_pool1=args.use_pool1, audio_input_3d=args.audio_input_3d, g_blend=args.g_blend, audio_weight=args.audio_weight, visual_weight=args.visual_weight, av_weight=args.av_weight, )
def export_inference_model(args): """ Export PaddlePaddle inference model for prediction depolyment and serving. """ print("Exporting inference model...") startup_prog = fluid.Program() infer_prog = fluid.Program() image, logit_out = build_model(infer_prog, startup_prog, phase=ModelPhase.PREDICT) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) infer_prog = infer_prog.clone(for_test=True) if os.path.exists(cfg.TEST.TEST_MODEL): fluid.io.load_params(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) else: print("TEST.TEST_MODEL diretory is empty!") exit(-1) fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR, feeded_var_names=[image.name], target_vars=[logit_out], executor=exe, main_program=infer_prog, model_filename=cfg.FREEZE.MODEL_FILENAME, params_filename=cfg.FREEZE.PARAMS_FILENAME) print("Inference model exported!") print("Exporting inference model config...") deploy_cfg_path = export_inference_config() print("Inference model saved : [%s]" % (deploy_cfg_path))
def init_lanenet(self): ''' initlize the paddlepaddle model ''' startup_prog = fluid.Program() test_prog = fluid.Program() self.pred, self.logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() self.exe = fluid.Executor(place) self.exe.run(startup_prog) ckpt_dir = self.weight_path if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), self.exe) except: fluid.io.load_params(self.exe, ckpt_dir, main_program=test_prog) self.postprocessor = lanenet_postprocess.LaneNetPostProcessor()
def create_model_ops(model, loss_scale): return model_builder.build_model( model=model, model_name=args.model_name, model_depth=args.model_depth, num_labels=args.num_labels, num_channels=args.num_channels, crop_size=args.crop_size, clip_length=(args.clip_length_of if args.input_type else args.clip_length_rgb), loss_scale=loss_scale, pred_layer_name=args.pred_layer_name, )
def export_inference_model(args): """ Export PaddlePaddle inference model for prediction depolyment and serving. """ print("Exporting inference model...") startup_prog = fluid.Program() infer_prog = fluid.Program() image, logit_out = build_model(infer_prog, startup_prog, phase=ModelPhase.PREDICT) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) infer_prog = infer_prog.clone(for_test=True) not_quant_pattern_list = [] if args.not_quant_pattern is not None: not_quant_pattern_list = args.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern_list } infer_prog = quant_aware(infer_prog, place, config, for_test=True) if os.path.exists(cfg.TEST.TEST_MODEL): fluid.io.load_persistables(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) else: print("TEST.TEST_MODEL diretory is empty!") exit(-1) infer_prog = convert(infer_prog, place, config) fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR, feeded_var_names=[image.name], target_vars=[logit_out], executor=exe, main_program=infer_prog, model_filename=cfg.FREEZE.MODEL_FILENAME, params_filename=cfg.FREEZE.PARAMS_FILENAME) print("Inference model exported!") print("Exporting inference model config...") deploy_cfg_path = export_inference_config() print("Inference model saved : [%s]" % (deploy_cfg_path))
def start_training(EPOCHS, device, train_loader, test_loader, **models_dict): results = {} logger.info("\n**** Started training ****\n") for model_type in models_dict: #print(f"Model: {model_type}") logger.info(f"\nModel: {model_type}\n") train_accs, train_losses, test_acc, test_losses, best_model = build_model( EPOCHS, device, train_loader, test_loader, **models_dict[model_type]) results[model_type] = [ train_accs, train_losses, test_acc, test_losses, best_model ] #print(results) logger.info(f"\nresults : {results}\n") time.sleep(10) logger.info("\n**** Ended training ****\n") return results
def export_serving_model(args): """ Export PaddlePaddle inference model for prediction depolyment and serving. """ print("Exporting serving model...") startup_prog = fluid.Program() infer_prog = fluid.Program() image, logit_out = build_model(infer_prog, startup_prog, phase=ModelPhase.PREDICT) # Use CPU for exporting inference model instead of GPU place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) infer_prog = infer_prog.clone(for_test=True) if os.path.exists(cfg.TEST.TEST_MODEL): print('load test model:', cfg.TEST.TEST_MODEL) try: fluid.load(infer_prog, os.path.join(cfg.TEST.TEST_MODEL, 'model'), exe) except: fluid.io.load_params(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) else: print("TEST.TEST_MODEL diretory is empty!") exit(-1) from paddle_serving_client.io import save_model save_model( cfg.FREEZE.SAVE_DIR + "/serving_server", cfg.FREEZE.SAVE_DIR + "/serving_client", {image.name: image}, {logit_out.name: logit_out}, infer_prog, ) print("Serving model exported!") print("Exporting serving model config...") deploy_cfg_path = export_inference_config() print("Serving model saved : [%s]" % (deploy_cfg_path))
def create_model_ops(model, loss_scale): return model_builder.build_model( model=model, model_name=args.model_name, model_depth=args.model_depth, num_labels=args.num_labels, batch_size=args.batch_size, num_channels=args.num_channels, crop_size=args.crop_size, clip_length=(args.clip_length_of if args.input_type == 1 else args.clip_length_rgb), loss_scale=loss_scale, is_test=1, multi_label=args.multi_label, channel_multiplier=args.channel_multiplier, bottleneck_multiplier=args.bottleneck_multiplier, use_dropout=args.use_dropout, use_convolutional_pred=args.use_convolutional_pred, use_pool1=args.use_pool1, )
def test(cfg): """ Test a model """ logging.setup_logging(logger, cfg) logger.info("Test with config") logger.info(pprint.pformat(cfg)) model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) if cfg.TEST.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") gs, checkpoint_epoch = cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer=None, inflation=False, convert_from_caffe2=False) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) gs, checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, None) start_epoch = checkpoint_epoch + 1 # Create the video train and val loaders. test_loader = loader.construct_loader(cfg, "test") test_meter = TestMeter(cfg) if cfg.TEST.AUGMENT_TEST: evaluate_with_augmentation(test_loader, model, test_meter, cfg) else: evaluate(test_loader, model, test_meter, cfg)
def load_model(): startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) fetch_list = [pred.name] # Clone forward graph test_prog = test_prog.clone(for_test=True) # Get device environment place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) # # Get device environment # places = [fluid.CUDAPlace(i) for i in range(4)] # exes = [fluid.Executor(places[i]) for i in range(4)] # for exe in exes: # exe.run(startup_prog) # # ckpt_dir = cfg.TEST.TEST_MODEL # if ckpt_dir is not None: # print('load test model:', ckpt_dir) # for i in range(4): # try: # fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exes[i]) # except: # fluid.io.load_params(exes[i], ckpt_dir, main_program=test_prog) return fetch_list, test_prog, exe #s
def train(cfg): """ Train a video model for many epochs on train set and evaluate it on val set. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Setup logging format. logging.setup_logging(logger, cfg) # Print config. logger.info("Train with config:") logger.info(pprint.pformat(cfg)) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Construct the optimizer. optimizer = optim.construct_optimizer(model, cfg) # Record global step gs = 0 # Load a checkpoint to resume training if applicable. if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR): logger.info("Load from last checkpoint.") last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) gs, checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer) start_epoch = checkpoint_epoch + 1 elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": logger.info("Load from given checkpoint file.") if cfg.TRAIN.LOAD_PART_OF_CHECKPOINT: gs, checkpoint_epoch = cu.load_part_of_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer=None) else: gs, checkpoint_epoch = cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, optimizer=None, inflation=False, convert_from_caffe2=False) start_epoch = checkpoint_epoch + 1 else: gs = 0 start_epoch = 0 # Create the video train and val loaders. train_loader = loader.construct_loader(cfg, "train") val_loader = loader.construct_loader(cfg, "val") # Create meters. train_meter = TrainMeter(len(train_loader), cfg) val_meter = ValMeter(cfg) # Perform the training loop. logger.info("Start epoch: {} gs {}".format(start_epoch + 1, gs + 1)) for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH): # Shuffle the dataset. loader.shuffle_dataset(train_loader, cur_epoch) # Evaluate the model on validation set. if misc.is_eval_epoch(cfg, cur_epoch): if cfg.TRAIN.USE_CENTER_VALIDATION: validation_epoch_center(val_loader, model, val_meter, cur_epoch, cfg) else: validation_epoch(val_loader, model, val_meter, cur_epoch, cfg) # Train for one epoch. gs = train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, gs, cfg) # Compute precise BN stats. # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0: # calculate_and_update_precise_bn( # train_loader, model, cfg.BN.NUM_BATCHES_PRECISE # ) # Save a checkpoint. if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD): cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, gs, cfg)
def main(): # Set logger to record information. utils.check_env(cfg) logger = Logger(cfg) logger.log_info(cfg) metrics_handler = MetricsHandler(cfg.metrics) # utils.pack_code(cfg, logger=logger) # Build model. model = model_builder.build_model(cfg=cfg, logger=logger) optimizer = optimizer_helper.build_optimizer(cfg=cfg, model=model) lr_scheduler = lr_scheduler_helper.build_scheduler(cfg=cfg, optimizer=optimizer) # Read checkpoint. ckpt = torch.load(cfg.model.path2ckpt) if cfg.gnrl.resume else {} if cfg.gnrl.resume: with logger.log_info(msg="Load pre-trained model.", level="INFO", state=True, logger=logger): model.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) lr_scheduler.load_state_dict(ckpt["lr_scheduler"]) # Set device. model, device = utils.set_pipline( model, cfg) if cfg.gnrl.PIPLINE else utils.set_device( model, cfg.gnrl.cuda) resume_epoch = ckpt["epoch"] if cfg.gnrl.resume else 0 loss_fn = loss_fn_helper.build_loss_fn(cfg=cfg) # Prepare dataset. train_loaders, valid_loaders, test_loaders = dict(), dict(), dict() for dataset in cfg.data.datasets: if cfg.data[dataset].TRAIN: try: train_loaders[dataset] = data_loader.build_data_loader( cfg, dataset, "train") except: utils.notify(msg="Failed to build train loader of %s" % dataset) if cfg.data[dataset].VALID: try: valid_loaders[dataset] = data_loader.build_data_loader( cfg, dataset, "valid") except: utils.notify(msg="Failed to build valid loader of %s" % dataset) if cfg.data[dataset].TEST: try: test_loaders[dataset] = data_loader.build_data_loader( cfg, dataset, "test") except: utils.notify(msg="Failed to build test loader of %s" % dataset) # TODO Train, evaluate model and save checkpoint. for epoch in range(cfg.train.max_epoch): epoch += 1 if resume_epoch >= epoch: continue eval_kwargs = { "epoch": epoch, "cfg": cfg, "model": model, "loss_fn": loss_fn, "device": device, "metrics_handler": metrics_handler, "logger": logger, "save": cfg.save.save, } train_kwargs = { "epoch": epoch, "cfg": cfg, "model": model, "loss_fn": loss_fn, "optimizer": optimizer, "device": device, "lr_scheduler": lr_scheduler, "metrics_handler": metrics_handler, "logger": logger, } ckpt_kwargs = { "epoch": epoch, "cfg": cfg, "model": model.state_dict(), "metrics_handler": metrics_handler, "optimizer": optimizer.state_dict(), "lr_scheduler": lr_scheduler.state_dict(), } for dataset in cfg.data.datasets: if cfg.data[dataset].TRAIN: utils.notify("Train on %s" % dataset) train_one_epoch(data_loader=train_loaders[dataset], **train_kwargs) utils.save_ckpt(path2file=cfg.model.path2ckpt, **ckpt_kwargs) if epoch in cfg.gnrl.ckphs: utils.save_ckpt(path2file=os.path.join( cfg.model.ckpts, cfg.gnrl.id + "_" + str(epoch).zfill(5) + ".pth"), **ckpt_kwargs) for dataset in cfg.data.datasets: utils.notify("Evaluating test set of %s" % dataset, logger=logger) if cfg.data[dataset].TEST: evaluate(data_loader=test_loaders[dataset], phase="test", **eval_kwargs) for dataset in cfg.data.datasets: utils.notify("Evaluating valid set of %s" % dataset, logger=logger) if cfg.data[dataset].VALID: evaluate(data_loader=valid_loaders[dataset], phase="valid", **eval_kwargs) # End of train-valid for loop. eval_kwargs = { "epoch": epoch, "cfg": cfg, "model": model, "loss_fn": loss_fn, "device": device, "metrics_handler": metrics_handler, "logger": logger, "save": cfg.save.save, } for dataset in cfg.data.datasets: if cfg.data[dataset].VALID: utils.notify("Evaluating valid set of %s" % dataset, logger=logger) evaluate(data_loader=valid_loaders[dataset], phase="valid", **eval_kwargs) for dataset in cfg.data.datasets: if cfg.data[dataset].TEST: utils.notify("Evaluating test set of %s" % dataset, logger=logger) evaluate(data_loader=test_loaders[dataset], phase="test", **eval_kwargs) for dataset in cfg.data.datasets: if "train" in cfg.data[dataset].INFER: utils.notify("Inference on train set of %s" % dataset) inference(data_loader=train_loaders[dataset], phase="infer_train", **eval_kwargs) if "valid" in cfg.data[dataset].INFER: utils.notify("Inference on valid set of %s" % dataset) inference(data_loader=valid_loaders[dataset], phase="infer_valid", **eval_kwargs) if "test" in cfg.data[dataset].INFER: utils.notify("Inference on test set of %s" % dataset) inference(data_loader=test_loaders[dataset], phase="infer_test", **eval_kwargs) return None
def visualize(cfg1, vis_file_list=None, use_gpu=False, vis_dir="visual", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg1.DATASET.VIS_FILE_LIST dataset = SegDataset( file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg1.TEST.TEST_MODEL if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = vis_dir makedirs(save_dir) fetch_list = [pred.name, out.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, out, = exe.run( program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) out = np.array(out) pred = np.argmax(out, axis=3) pred = np.expand_dims(pred, -1) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize( res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize( res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') #pred_mask.putpalette(color_map) pred_mask.save(vis_fn) img_cnt += 1 print("#{} visualize image path: {}".format(img_cnt, vis_fn)) # Use VisualDL to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name epoch = int(os.path.split(ckpt_dir)[-1]) print("VisualDL visualization epoch", epoch) pred_mask_np = np.array(pred_mask.convert("RGB")) log_writer.add_image("Predict/{}".format(img_name), pred_mask_np, epoch) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg1.DATASET.DATA_DIR, img_name))[..., ::-1] log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images grt = grts[i] if grt is not None: grt = grt[0:valid_shape[0], 0:valid_shape[1]] grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P') grt_pil.putpalette(color_map) grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) grt = np.array(grt_pil.convert("256")) log_writer.add_image("Label/{}".format(img_name), grt, epoch) # If in local_test mode, only visualize 5 images just for testing # procedure if local_test and img_cnt >= 5: break
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset( file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL) data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_vdl: if not args.vdl_log_dir: print_info("Please specify the log directory by --vdl_log_dir.") exit(1) from visualdl import LogWriter log_writer = LogWriter(args.vdl_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 best_mIoU = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_vdl: log_writer.add_scalar('Train/mean_iou', mean_iou, step) log_writer.add_scalar('Train/mean_acc', mean_acc, step) log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/step/sec', speed, step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, speed, calculate_eta(all_step - step, speed))) if args.use_vdl: log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/speed', speed, step) sys.stdout.flush() avg_loss = 0.0 timer.restart() # NOTE : used for benchmark, profiler tools if args.is_profiler and epoch == 1 and step == args.log_steps: profiler.start_profiler("All") elif args.is_profiler and epoch == 1 and step == args.log_steps + 5: profiler.stop_profiler("total", args.profiler_path) return except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, epoch) save_infer_program(test_prog, ckpt_dir) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate( cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) if mean_iou > best_mIoU: best_mIoU = mean_iou update_best_model(ckpt_dir) print_info("Save best model {} to {}, mIoU = {:.4f}".format( ckpt_dir, os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), mean_iou)) # Use VisualDL to visualize results if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: visualize( cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, 'final') save_infer_program(test_prog, ckpt_dir)
def main(opt, device_id): opt = training_opt_postprocessing(opt, device_id) init_logger(opt.log_file) # Load checkpoint if we resume from a previous training. if opt.train_from: logger.info('Loading checkpoint from %s' % opt.train_from) checkpoint = torch.load(opt.train_from, map_location=lambda storage, loc: storage) # Load default opts values then overwrite it with opts from # the checkpoint. It's usefull in order to re-train a model # after adding a new option (not set in checkpoint) dummy_parser = configargparse.ArgumentParser() opts.model_opts(dummy_parser) default_opt = dummy_parser.parse_known_args([])[0] model_opt = default_opt model_opt.__dict__.update(checkpoint['opt'].__dict__) else: checkpoint = None model_opt = opt # Peek the first dataset to determine the data_type. # (All datasets have the same data_type). first_dataset = next(lazily_load_dataset("train", opt)) data_type = first_dataset.data_type model_opt.input_size = first_dataset.examples[0].src.size()[0] # Load fields generated from preprocess phase. fields = load_fields(first_dataset, opt, checkpoint) # Report src/tgt features. src_features, tgt_features = _collect_report_features(fields) for j, feat in enumerate(src_features): logger.info(' * src feature %d size = %d' % (j, len(fields[feat].vocab))) for j, feat in enumerate(tgt_features): logger.info(' * tgt feature %d size = %d' % (j, len(fields[feat].vocab))) # Build model. model = build_model(model_opt, opt, fields, checkpoint) n_params, enc, dec = _tally_parameters(model) logger.info('encoder: %d' % enc) logger.info('decoder: %d' % dec) logger.info('* number of parameters: %d' % n_params) _check_save_model_path(opt) # Build optimizer. optim = build_optim(model, opt, checkpoint) # Build model saver model_saver = build_model_saver(model_opt, opt, model, fields, optim) trainer = build_trainer(opt, device_id, model, fields, optim, data_type, model_saver=model_saver) def train_iter_fct(): return build_dataset_iter( lazily_load_dataset("train", opt), fields, opt) def valid_iter_fct(): return build_dataset_iter( lazily_load_dataset("valid", opt), fields, opt, is_train=False) # Do training. if len(opt.gpu_ranks): logger.info('Starting training on GPU: %s' % opt.gpu_ranks) else: logger.info('Starting training on CPU, could be very slow') trainer.train(train_iter_fct, valid_iter_fct, opt.train_steps, opt.valid_steps) if opt.tensorboard: trainer.report_manager.tensorboard_writer.close()
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", also_save_raw_results=False, ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.TEST_FILE_LIST dataset = LaneNetDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = os.path.join(vis_dir, 'visual_results') makedirs(save_dir) if also_save_raw_results: raw_save_dir = os.path.join(vis_dir, 'raw_results') makedirs(raw_save_dir) fetch_list = [pred.name, logit.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) postprocessor = lanenet_postprocess.LaneNetPostProcessor() for imgs, grts, grts_instance, img_names, valid_shapes, org_imgs in test_reader: segLogits, emLogits = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = segLogits.shape[0] for i in range(num_imgs): gt_image = org_imgs[i] binary_seg_image, instance_seg_image = segLogits[i].squeeze( -1), emLogits[i].transpose((1, 2, 0)) postprocess_result = postprocessor.postprocess( binary_seg_result=binary_seg_image, instance_seg_result=instance_seg_image, source_image=gt_image) pred_binary_fn = os.path.join( save_dir, to_png_fn(img_names[i], name='_pred_binary')) pred_lane_fn = os.path.join( save_dir, to_png_fn(img_names[i], name='_pred_lane')) pred_instance_fn = os.path.join( save_dir, to_png_fn(img_names[i], name='_pred_instance')) dirname = os.path.dirname(pred_binary_fn) makedirs(dirname) mask_image = postprocess_result['mask_image'] for i in range(4): instance_seg_image[:, :, i] = minmax_scale(instance_seg_image[:, :, i]) embedding_image = np.array(instance_seg_image).astype(np.uint8) plt.figure('mask_image') plt.imshow(mask_image[:, :, (2, 1, 0)]) plt.figure('src_image') plt.imshow(gt_image[:, :, (2, 1, 0)]) plt.figure('instance_image') plt.imshow(embedding_image[:, :, (2, 1, 0)]) plt.figure('binary_image') plt.imshow(binary_seg_image * 255, cmap='gray') plt.show() cv2.imwrite(pred_binary_fn, np.array(binary_seg_image * 255).astype(np.uint8)) cv2.imwrite(pred_lane_fn, postprocess_result['source_image']) cv2.imwrite(pred_instance_fn, mask_image) print(pred_lane_fn, 'saved!')
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="show", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.VIS_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = "show" makedirs(save_dir) fetch_list = [pred.name, logit.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, logit = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') pred_mask.putpalette(color_map) # pred_mask.save(vis_fn) pred_mask_np = np.array(pred_mask.convert("RGB")) im_pred = PILImage.fromarray(pred_mask_np) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] im_ori = PILImage.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images im_pred_cat = PILImage.blend(im_ori, im_pred, 0.5) im_ori = join(im_ori, im_ori, flag="vertical") im_pred_cat = join(im_pred_cat, im_pred, flag="vertical") new_img = join(im_ori, im_pred_cat) new_img.save(vis_fn) img_cnt += 1 print("#{} show image path: {}".format(img_cnt, vis_fn))
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): np.set_printoptions(precision=5, suppress=True) startup_prog = fluid.Program() test_prog = fluid.Program() dataset = SegDataset(file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.EVAL, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: yield b[0], b[1], b[2] data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) data_loader.set_sample_generator(data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if not os.path.exists(ckpt_dir): raise ValueError( 'The TEST.TEST_MODEL {} is not found'.format(ckpt_dir)) if ckpt_dir is not None: print('load test model:', ckpt_dir) load_model(exe, test_prog, ckpt_dir) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) fetch_list = [avg_loss.name, pred.name, grts.name, masks.name] num_images = 0 step = 0 all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() data_loader.start() while True: try: step += 1 loss, pred, grts, masks = exe.run(test_prog, fetch_list=fetch_list, return_numpy=True) loss = np.mean(np.array(loss)) num_images += pred.shape[0] conf_mat.calculate(pred, grts, masks) _, iou = conf_mat.mean_iou() _, acc = conf_mat.accuracy() speed = 1.0 / timer.elapsed_time() print( "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}" .format(step, loss, acc, iou, speed, calculate_eta(all_step - step, speed))) timer.restart() sys.stdout.flush() except fluid.core.EOFException: break category_iou, avg_iou = conf_mat.mean_iou() category_acc, avg_acc = conf_mat.accuracy() print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format( num_images, avg_acc, avg_iou)) print("[EVAL]Category IoU:", category_iou) print("[EVAL]Category Acc:", category_acc) print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa())) return category_iou, avg_iou, category_acc, avg_acc
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): np.set_printoptions(precision=5, suppress=True) startup_prog = fluid.Program() test_prog = fluid.Program() dataset = LaneNetDataset(file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: yield b data_loader, pred, grts, masks, accuracy, fp, fn = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) data_loader.set_sample_generator(data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list = [ pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name ] num_images = 0 step = 0 avg_acc = 0.0 avg_fp = 0.0 avg_fn = 0.0 # cur_images = 0 all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() data_loader.start() while True: try: step += 1 pred, grts, masks, out_acc, out_fp, out_fn = exe.run( test_prog, fetch_list=fetch_list, return_numpy=True) avg_acc += np.mean(out_acc) * pred.shape[0] avg_fp += np.mean(out_fp) * pred.shape[0] avg_fn += np.mean(out_fn) * pred.shape[0] num_images += pred.shape[0] speed = 1.0 / timer.elapsed_time() print( "[EVAL]step={} accuracy={:.4f} fp={:.4f} fn={:.4f} step/sec={:.2f} | ETA {}" .format(step, avg_acc / num_images, avg_fp / num_images, avg_fn / num_images, speed, calculate_eta(all_step - step, speed))) timer.restart() sys.stdout.flush() except fluid.core.EOFException: break print("[EVAL]#image={} accuracy={:.4f} fp={:.4f} fn={:.4f}".format( num_images, avg_acc / num_images, avg_fp / num_images, avg_fn / num_images)) return avg_acc / num_images, avg_fp / num_images, avg_fn / num_images
def test_model(): # from configs.configs import cfg model = model_builder.build_model(cfg=cfg, logger=None) inp = torch.randn((2, 6, 512, 512)) out = model(inp) print(model)
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) py_reader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) py_reader.decorate_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars(exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_tb: if not args.tb_log_dir: print_info("Please specify the log directory by --tb_log_dir.") exit(1) from tb_paddle import SummaryWriter log_writer = SummaryWriter(args.tb_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError(( "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): py_reader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - global_step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_tb: log_writer.add_scalar('Train/mean_iou', mean_iou, global_step) log_writer.add_scalar('Train/mean_acc', mean_acc, global_step) log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/step/sec', speed, global_step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/speed', speed, global_step) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: py_reader.reset() break except Exception as e: print(e) if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(exe, train_prog, epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate(cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_tb: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, global_step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, global_step) # Use Tensorboard to visualize results if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: visualize(cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: save_checkpoint(exe, train_prog, 'final')
def evaluate(cfg, ckpt_dir=None, use_gpu=False, vis=False, vis_dir='vis_out/test_public', use_mpio=False, **kwargs): np.set_printoptions(precision=5, suppress=True) startup_prog = fluid.Program() test_prog = fluid.Program() dataset = SegDataset( file_list=cfg.DATASET.VAL_FILE_LIST, mode=ModelPhase.EVAL, data_dir=cfg.DATASET.DATA_DIR) fls = [] with open(cfg.DATASET.VAL_FILE_LIST) as fr: for line in fr.readlines(): fls.append(line.strip().split(' ')[0]) if vis: assert cfg.VIS.VISINEVAL is True if not os.path.exists(vis_dir): os.makedirs(vis_dir) def data_generator(): #TODO: check is batch reader compatitable with Windows if use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() for b in data_gen: if cfg.DATASET.INPUT_IMAGE_NUM == 1: yield b[0], b[1], b[2] else: yield b[0], b[1], b[2], b[3] data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment places = fluid.cuda_places() if use_gpu else fluid.cpu_places() place = places[0] dev_count = len(places) print("#Device count: {}".format(dev_count)) exe = fluid.Executor(place) exe.run(startup_prog) test_prog = test_prog.clone(for_test=True) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if not os.path.exists(ckpt_dir): raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir)) if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) # Use streaming confusion matrix to calculate mean_iou np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") class_num = cfg.DATASET.NUM_CLASSES conf_mat = ConfusionMatrix(class_num, streaming=True) fetch_list = [avg_loss.name, pred.name, grts.name, masks.name] num_images = 0 step = 0 all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() data_loader.start() cnt = 0 while True: try: step += 1 loss, pred, grts, masks = exe.run( test_prog, fetch_list=fetch_list, return_numpy=True) if vis: preds = np.array(pred, dtype=np.float32) for j in range(preds.shape[0]): if cnt > len(fls): continue name = fls[cnt].split('/')[-1].split('.')[0] p = np.squeeze(preds[j]) np.save(os.path.join(vis_dir, name + '.npy'), p) cnt += 1 print('vis %d npy... (%d tif sample)' % (cnt, cnt//36)) continue loss = np.mean(np.array(loss)) num_images += pred.shape[0] conf_mat.calculate(pred, grts, masks) _, iou = conf_mat.mean_iou() _, acc = conf_mat.accuracy() fwiou = conf_mat.frequency_weighted_iou() speed = 1.0 / timer.elapsed_time() print( "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} FWIoU={:.4f} step/sec={:.2f} | ETA {}" .format(step, loss, acc, iou, fwiou, speed, calculate_eta(all_step - step, speed))) timer.restart() sys.stdout.flush() except fluid.core.EOFException: break if vis: return category_iou, avg_iou = conf_mat.mean_iou() category_acc, avg_acc = conf_mat.accuracy() fwiou = conf_mat.frequency_weighted_iou() print("[EVAL]#image={} acc={:.4f} IoU={:.4f} FWIoU={:.4f}".format( num_images, avg_acc, avg_iou, fwiou)) print("[EVAL]Category Acc:", category_acc) print("[EVAL]Category IoU:", category_iou) print("[EVAL]Kappa: {:.4f}".format(conf_mat.kappa())) return category_iou, avg_iou, category_acc, avg_acc
def main(): # Set logger to record information. logger = Logger(cfg) logger.log_info(cfg) metrics_logger = Metrics() utils.pack_code(cfg, logger=logger) # Build model. model = model_builder.build_model(cfg=cfg, logger=logger) # Read checkpoint. ckpt = torch.load(cfg.MODEL.PATH2CKPT) if cfg.GENERAL.RESUME else {} if cfg.GENERAL.RESUME: model.load_state_dict(ckpt["model"]) resume_epoch = ckpt["epoch"] if cfg.GENERAL.RESUME else 0 optimizer = ckpt[ "optimizer"] if cfg.GENERAL.RESUME else optimizer_helper.build_optimizer( cfg=cfg, model=model) # lr_scheduler = ckpt["lr_scheduler"] if cfg.GENERAL.RESUME else lr_scheduler_helper.build_scheduler(cfg=cfg, optimizer=optimizer) lr_scheduler = lr_scheduler_helper.build_scheduler(cfg=cfg, optimizer=optimizer) lr_scheduler.sychronize(resume_epoch) loss_fn = ckpt[ "loss_fn"] if cfg.GENERAL.RESUME else loss_fn_helper.build_loss_fn( cfg=cfg) # Set device. model, device = utils.set_device(model, cfg.GENERAL.GPU) # Prepare dataset. if cfg.GENERAL.TRAIN: try: train_data_loader = data_loader.build_data_loader( cfg, cfg.DATA.DATASET, "train") except: logger.log_info("Cannot build train dataset.") if cfg.GENERAL.VALID: try: valid_data_loader = data_loader.build_data_loader( cfg, cfg.DATA.DATASET, "valid") except: logger.log_info("Cannot build valid dataset.") if cfg.GENERAL.TEST: try: test_data_loader = data_loader.build_data_loader( cfg, cfg.DATA.DATASET, "test") except: logger.log_info("Cannot build test dataset.") # Train, evaluate model and save checkpoint. for epoch in range(cfg.TRAIN.MAX_EPOCH): if resume_epoch >= epoch: continue try: train_one_epoch( epoch=epoch, cfg=cfg, model=model, data_loader=train_data_loader, device=device, loss_fn=loss_fn, optimizer=optimizer, lr_scheduler=lr_scheduler, metrics_logger=metrics_logger, logger=logger, ) except: logger.log_info("Failed to train model.") optimizer.zero_grad() with torch.no_grad(): utils.save_ckpt( path2file=os.path.join( cfg.MODEL.CKPT_DIR, cfg.GENERAL.ID + "_" + str(epoch).zfill(3) + ".pth"), logger=logger, model=model.state_dict(), epoch=epoch, optimizer=optimizer, lr_scheduler=lr_scheduler, # NOTE Need attribdict>=0.0.5 loss_fn=loss_fn, metrics=metrics_logger, ) try: evaluate( epoch=epoch, cfg=cfg, model=model, data_loader=valid_data_loader, device=device, loss_fn=loss_fn, metrics_logger=metrics_logger, phase="valid", logger=logger, save=cfg.SAVE.SAVE, ) except: logger.log_info("Failed to evaluate model.") with torch.no_grad(): utils.save_ckpt( path2file=os.path.join( cfg.MODEL.CKPT_DIR, cfg.GENERAL.ID + "_" + str(epoch).zfill(3) + ".pth"), logger=logger, model=model.state_dict(), epoch=epoch, optimizer=optimizer, lr_scheduler=lr_scheduler, # NOTE Need attribdict>=0.0.5 loss_fn=loss_fn, metrics=metrics_logger, ) # If test set has target images, evaluate and save them, otherwise just try to generate output images. if cfg.DATA.DATASET == "DualPixelNTIRE2021": try: generate( cfg=cfg, model=model, data_loader=valid_data_loader, device=device, phase="valid", logger=logger, ) except: logger.log_info( "Failed to generate output images of valid set of NTIRE2021.") try: evaluate( epoch=epoch, cfg=cfg, model=model, data_loader=test_data_loader, device=device, loss_fn=loss_fn, metrics_logger=metrics_logger, phase="test", logger=logger, save=True, ) except: logger.log_info("Failed to test model, try to generate images.") try: generate( cfg=cfg, model=model, data_loader=test_data_loader, device=device, phase="test", logger=logger, ) except: logger.log_info("Cannot generate output images of test set.") return None
def test(cfg): """ Perform multi-view testing/feature extraction on the pretrained video model. Args: cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Test with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) if du.is_master_proc(): misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": cu.load_checkpoint( cfg.TEST.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2", ) elif cu.has_checkpoint(cfg.OUTPUT_DIR): last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR) cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpint from # TRAIN.CHECKPOINT_FILE_PATH and test it. cu.load_checkpoint( cfg.TRAIN.CHECKPOINT_FILE_PATH, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2", ) else: raise NotImplementedError("Unknown way to load checkpoint.") vid_root = cfg.DATA.PATH_TO_DATA_DIR videos_list_file = os.path.join(vid_root, "vid_list.csv") print("Loading Video List ...") with open(videos_list_file) as f: videos = sorted( [x.strip() for x in f.readlines() if len(x.strip()) > 0]) print("Done") print("----------------------------------------------------------") print("{} videos to be processed...".format(len(videos))) print("----------------------------------------------------------") start_time = time.time() for vid in videos: # Create video testing loaders. path_to_vid = os.path.join(vid_root, os.path.split(vid)[0]) vid_id = os.path.split(vid)[1] out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0]) out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES) if os.path.exists(os.path.join(out_path, out_file)): print("{} already exists".format(out_file)) continue print("Processing {}...".format(vid)) dataset = VideoSet(cfg, path_to_vid, vid_id) test_loader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, sampler=None, num_workers=cfg.DATA_LOADER.NUM_WORKERS, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, drop_last=False, ) # Perform multi-view test on the entire dataset. feat_arr = multi_view_test(test_loader, model, cfg) os.makedirs(out_path, exist_ok=True) np.save(os.path.join(out_path, out_file), feat_arr) print("Done.") print("----------------------------------------------------------") end_time = time.time() hours, minutes, seconds = calculate_time_taken(start_time, end_time) print("Time taken: {} hour(s), {} minute(s) and {} second(s)".format( hours, minutes, seconds)) print("----------------------------------------------------------")
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", also_save_raw_results=False, ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.TEST_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = os.path.join(vis_dir, 'visual_results') makedirs(save_dir) if also_save_raw_results: raw_save_dir = os.path.join(vis_dir, 'raw_results') makedirs(raw_save_dir) fetch_list = [pred.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] grt = grts[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) if grt is not None: grt = grt[0:valid_shape[0], 0:valid_shape[1]] grt = cv2.resize(grt, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_names[i]) if also_save_raw_results: raw_fn = os.path.join(raw_save_dir, png_fn) dirname = os.path.dirname(raw_save_dir) makedirs(dirname) cv2.imwrite(raw_fn, res_map) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = colorize(res_map, org_shapes[i], color_map) if grt is not None: grt = colorize(grt, org_shapes[i], color_map) cv2.imwrite(vis_fn, pred_mask) img_cnt += 1 print("#{} visualize image path: {}".format(img_cnt, vis_fn)) # Use Tensorboard to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name epoch = int(os.path.split(ckpt_dir)[-1]) print("Tensorboard visualization epoch", epoch) log_writer.add_image("Predict/{}".format(img_names[i]), pred_mask[..., ::-1], epoch, dataformats='HWC') # Original image # BGR->RGB img = cv2.imread( os.path.join(cfg.DATASET.DATA_DIR, img_names[i]))[..., ::-1] log_writer.add_image("Images/{}".format(img_names[i]), img, epoch, dataformats='HWC') #add ground truth (label) images if grt is not None: log_writer.add_image("Label/{}".format(img_names[i]), grt[..., ::-1], epoch, dataformats='HWC') # If in local_test mode, only visualize 5 images just for testing # procedure if local_test and img_cnt >= 5: break
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.TEST_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(cfg.DATASET.NUM_CLASSES**2 if cfg.DATASET.NUM_CLASSES**2 < 256 else 256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = vis_dir makedirs(save_dir) fetch_list = [pred.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 def exe_run(): if cfg.DATASET.INPUT_IMAGE_NUM == 1: for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, = exe.run(program=test_prog, feed={'image1': imgs}, fetch_list=fetch_list, return_numpy=True) yield pred, pred_shape, grts, img_names, valid_shapes, org_shapes else: for img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes in test_reader: pred_shape = (img1s.shape[2], img1s.shape[3]) pred, = exe.run(program=test_prog, feed={ 'image1': img1s, 'image2': img2s }, fetch_list=fetch_list, return_numpy=True) yield pred, pred_shape, grts, img1_names, valid_shapes, org_shapes for pred, pred_shape, grts, img_names, valid_shapes, org_shapes in exe_run( ): idx = pred.shape[0] if cfg.DATASET.INPUT_IMAGE_NUM == 2 and cfg.VIS.SEG_FOR_CD: idx = pred.shape[0] // cfg.DATASET.INPUT_IMAGE_NUM pred1, pred2 = pred[:idx], pred[ idx:] # fluid.layers.split(pred, 2, dim=0) num_imgs = pred1.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map_list = [] for pred in [pred1, pred2]: if pred.shape[0] == 0: continue #res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) res_map = np.squeeze(pred[i, :, :, :]).astype(np.float32) res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[ 1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) res_map_list.append(res_map) img_name = img_names[i] png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) if cfg.DATASET.INPUT_IMAGE_NUM == 1 or \ (cfg.DATASET.INPUT_IMAGE_NUM == 2 and not cfg.VIS.SEG_FOR_CD): res_map = res_map_list[0] if cfg.VIS.RAW_PRED: #pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') #pred_mask.save(vis_fn) np.save(vis_fn.replace(".png", ".npy"), res_map.astype(np.float32)) else: if cfg.VIS.ADD_LABEL: grt_im = cv2.resize(grts[i], pred_shape, interpolation=cv2.INTER_NEAREST) res_map = np.hstack((res_map, grt_im)) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='P') pred_mask.putpalette(color_map) pred_mask.save(vis_fn) else: res_map1, res_map2 = res_map_list diff = res_map1 * cfg.DATASET.NUM_CLASSES + res_map2 unchange_idx = np.where((res_map1 - res_map2) == 0) diff[unchange_idx] = 0 res_map = np.hstack((res_map1, res_map2, diff)) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='P') pred_mask.putpalette(color_map) pred_mask.save(vis_fn) img_cnt += 1 print("#{} visualize image path: {}".format(img_cnt, vis_fn)) # Use VisualDL to visualize image if log_writer is not None: # Calulate epoch from ckpt_dir folder name epoch = int(os.path.split(ckpt_dir)[-1]) print("VisualDL visualization epoch", epoch) pred_mask_np = np.array(pred_mask.convert("RGB")) log_writer.add_image("Predict/{}".format(img_name), pred_mask_np, epoch) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images grt = grts[i] if grt is not None: grt = grt[0:valid_shape[0], 0:valid_shape[1]] grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P') grt_pil.putpalette(color_map) grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) grt = np.array(grt_pil.convert("RGB")) log_writer.add_image("Label/{}".format(img_name), grt, epoch) # If in local_test mode, only visualize 5 images just for testing # procedure if local_test and img_cnt >= 5: break