def inference(cfg, model, device, **kwargs): cur_epoch = kwargs.get('cur_epoch', None) dataset_name = cfg.DATASETS.TEST.NAME num_gpus = cfg.NUM_GPUS data_loader = build_dataloader(cfg, is_train=False) dataset = data_loader.dataset evaluator = data_loader.dataset.evaluator evaluator.clean() logger.info("Evaluating {} dataset({} video clips):".format(dataset_name, len(dataset))) if is_master_proc(): for images, targets in tqdm(data_loader): compute_on_dataset(images, targets, device, model, num_gpus, evaluator) else: for images, targets in data_loader: compute_on_dataset(images, targets, device, model, num_gpus, evaluator) result_str, acc_dict = evaluator.get() logger.info(result_str) if is_master_proc(): output_dir = cfg.OUTPUT_DIR result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) \ if cur_epoch is None else os.path.join(output_dir, 'result_{:04d}.txt'.format(cur_epoch)) with open(result_path, "w") as f: f.write(result_str) return acc_dict
def inference(rgb_cfg, rgb_model, rgbdiff_cfg, rgbdiff_model, device): logger_name = rgb_cfg.INFER.NAME dataset_name = rgb_cfg.DATASETS.TEST.NAME output_dir = rgb_cfg.OUTPUT.DIR rgb_data_loader = build_dataloader(rgb_cfg, train=False) rgbdiff_data_loader = build_dataloader(rgbdiff_cfg, train=False) dataset = rgb_data_loader.dataset logger = setup_logger(logger_name) logger.info("Evaluating {} dataset({} video clips):".format( dataset_name, len(dataset))) results_dict, cate_acc_dict, acc_top1, acc_top5 = \ compute_on_dataset(rgb_model, rgb_data_loader, rgbdiff_model, rgbdiff_data_loader, device) top1_acc = np.mean(acc_top1) top5_acc = np.mean(acc_top5) result_str = '\ntotal - top_1 acc: {:.3f}, top_5 acc: {:.3f}\n'.format( top1_acc, top5_acc) classes = dataset.classes for key in sorted(results_dict.keys(), key=lambda x: int(x)): total_num = results_dict[key] acc_num = cate_acc_dict[key] cate_name = classes[int(key)] if total_num != 0: result_str += '{:<3} - {:<20} - acc: {:.2f}\n'.format( key, cate_name, acc_num / total_num * 100) else: result_str += '{:<3} - {:<20} - acc: 0.0\n'.format( key, cate_name, acc_num / total_num) logger.info(result_str) result_path = os.path.join( output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) with open(result_path, "w") as f: f.write(result_str) for handler in logger.handlers: logger.removeHandler(handler) return {'top1': top1_acc, 'top5': top5_acc}
def train(cfg): # Set up environment. init_distributed_training(cfg) local_rank_id = get_local_rank() # Set random seed from configs. np.random.seed(cfg.RNG_SEED + 10 * local_rank_id) torch.manual_seed(cfg.RNG_SEED + 10 * local_rank_id) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Setup logging format. logging.setup_logging(cfg.OUTPUT_DIR) logger.info('init start') # 迭代轮数从1开始计数 arguments = {"cur_epoch": 1} device = get_device(local_rank_id) model = build_recognizer(cfg, device) criterion = build_criterion(cfg, device) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT_DIR, save_to_disk=True) if cfg.TRAIN.RESUME: logger.info('resume start') extra_checkpoint_data = checkpointer.load(map_location=device) if isinstance(extra_checkpoint_data, dict): arguments['cur_epoch'] = extra_checkpoint_data['cur_epoch'] if cfg.LR_SCHEDULER.IS_WARMUP: logger.info('warmup start') if lr_scheduler.finished: optimizer.load_state_dict( lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict( lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer logger.info('warmup end') logger.info('resume end') data_loader = build_dataloader(cfg, is_train=True) logger.info('init end') synchronize() do_train(cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device)
def train(gpu, args, cfg): rank = args.nr * args.gpus + gpu setup(rank, args.world_size) logger = setup_logger(cfg.TRAIN.NAME) arguments = {"iteration": 0} torch.cuda.set_device(gpu) device = torch.device(f'cuda:{gpu}' if torch.cuda.is_available() else 'cpu') map_location = {'cuda:%d' % 0: 'cuda:%d' % rank} model = build_model(cfg, gpu, map_location=map_location) criterion = build_criterion(cfg) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT.DIR, save_to_disk=True, logger=logger) if args.resume: if is_master_proc(): logger.info('resume ...') extra_checkpoint_data = checkpointer.load(map_location=map_location, rank=rank) if extra_checkpoint_data != dict(): arguments['iteration'] = extra_checkpoint_data['iteration'] if cfg.LR_SCHEDULER.IS_WARMUP: if is_master_proc(): logger.info('warmup ...') if lr_scheduler.finished: optimizer.load_state_dict(lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict(lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer data_loader = build_dataloader(cfg, is_train=True, start_iter=arguments['iteration']) synchronize() do_train(args, cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device, logger) cleanup()
def train(gpu, args, cfg): rank = args.nr * args.gpus + gpu setup(rank, args.world_size, args.gpus) logger = setup_logger(cfg.TRAIN.NAME) arguments = {"iteration": 0} arguments['rank'] = rank device = torch.device( f'cuda:{gpu}' if torch.cuda.is_available() else 'cpu') map_location = {'cuda:%d' % 0: 'cuda:%d' % rank} model = build_model(cfg, map_location=map_location).to(device) if cfg.MODEL.PRETRAINED != "": if rank == 0 and logger: logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}') checkpointer = CheckPointer(model, logger=logger) checkpointer.load(cfg.MODEL.PRETRAINED, map_location=map_location, rank=rank) if args.gpus > 1: model = DDP(model, device_ids=[gpu], find_unused_parameters=True) criterion = build_criterion(cfg) optimizer = build_optimizer(cfg, model) lr_scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT.DIR, save_to_disk=True, logger=logger) if args.resume: if rank == 0: logger.info('resume ...') extra_checkpoint_data = checkpointer.load(map_location=map_location, rank=rank) if extra_checkpoint_data != dict(): arguments['iteration'] = extra_checkpoint_data['iteration'] if cfg.LR_SCHEDULER.WARMUP: if rank == 0: logger.info('warmup ...') if lr_scheduler.finished: optimizer.load_state_dict( lr_scheduler.after_scheduler.optimizer.state_dict()) else: optimizer.load_state_dict( lr_scheduler.optimizer.state_dict()) lr_scheduler.optimizer = optimizer lr_scheduler.after_scheduler.optimizer = optimizer data_loader = build_dataloader(cfg, train=True, start_iter=arguments['iteration'], world_size=args.world_size, rank=rank) model = do_train(args, cfg, arguments, data_loader, model, criterion, optimizer, lr_scheduler, checkpointer, device, logger) if rank == 0 and not args.stop_eval: logger.info('Start final evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished do_evaluation(cfg, model, device) cleanup()