示例#1
0
def inference(cfg, model, device, **kwargs):
    cur_epoch = kwargs.get('cur_epoch', None)
    dataset_name = cfg.DATASETS.TEST.NAME
    num_gpus = cfg.NUM_GPUS

    data_loader = build_dataloader(cfg, is_train=False)
    dataset = data_loader.dataset
    evaluator = data_loader.dataset.evaluator
    evaluator.clean()

    logger.info("Evaluating {} dataset({} video clips):".format(dataset_name, len(dataset)))

    if is_master_proc():
        for images, targets in tqdm(data_loader):
            compute_on_dataset(images, targets, device, model, num_gpus, evaluator)
    else:
        for images, targets in data_loader:
            compute_on_dataset(images, targets, device, model, num_gpus, evaluator)

    result_str, acc_dict = evaluator.get()
    logger.info(result_str)

    if is_master_proc():
        output_dir = cfg.OUTPUT_DIR
        result_path = os.path.join(output_dir,
                                   'result_{}.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) \
            if cur_epoch is None else os.path.join(output_dir, 'result_{:04d}.txt'.format(cur_epoch))

        with open(result_path, "w") as f:
            f.write(result_str)

    return acc_dict
示例#2
0
def inference(rgb_cfg, rgb_model, rgbdiff_cfg, rgbdiff_model, device):
    logger_name = rgb_cfg.INFER.NAME
    dataset_name = rgb_cfg.DATASETS.TEST.NAME
    output_dir = rgb_cfg.OUTPUT.DIR

    rgb_data_loader = build_dataloader(rgb_cfg, train=False)
    rgbdiff_data_loader = build_dataloader(rgbdiff_cfg, train=False)
    dataset = rgb_data_loader.dataset

    logger = setup_logger(logger_name)
    logger.info("Evaluating {} dataset({} video clips):".format(
        dataset_name, len(dataset)))

    results_dict, cate_acc_dict, acc_top1, acc_top5 = \
        compute_on_dataset(rgb_model, rgb_data_loader, rgbdiff_model, rgbdiff_data_loader, device)

    top1_acc = np.mean(acc_top1)
    top5_acc = np.mean(acc_top5)
    result_str = '\ntotal - top_1 acc: {:.3f}, top_5 acc: {:.3f}\n'.format(
        top1_acc, top5_acc)

    classes = dataset.classes
    for key in sorted(results_dict.keys(), key=lambda x: int(x)):
        total_num = results_dict[key]
        acc_num = cate_acc_dict[key]

        cate_name = classes[int(key)]

        if total_num != 0:
            result_str += '{:<3} - {:<20} - acc: {:.2f}\n'.format(
                key, cate_name, acc_num / total_num * 100)
        else:
            result_str += '{:<3} - {:<20} - acc: 0.0\n'.format(
                key, cate_name, acc_num / total_num)
    logger.info(result_str)

    result_path = os.path.join(
        output_dir,
        'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
    with open(result_path, "w") as f:
        f.write(result_str)

    for handler in logger.handlers:
        logger.removeHandler(handler)

    return {'top1': top1_acc, 'top5': top5_acc}
示例#3
0
def train(cfg):
    # Set up environment.
    init_distributed_training(cfg)
    local_rank_id = get_local_rank()

    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED + 10 * local_rank_id)
    torch.manual_seed(cfg.RNG_SEED + 10 * local_rank_id)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info('init start')
    # 迭代轮数从1开始计数
    arguments = {"cur_epoch": 1}

    device = get_device(local_rank_id)
    model = build_recognizer(cfg, device)
    criterion = build_criterion(cfg, device)
    optimizer = build_optimizer(cfg, model)
    lr_scheduler = build_lr_scheduler(cfg, optimizer)

    checkpointer = CheckPointer(model,
                                optimizer=optimizer,
                                scheduler=lr_scheduler,
                                save_dir=cfg.OUTPUT_DIR,
                                save_to_disk=True)
    if cfg.TRAIN.RESUME:
        logger.info('resume start')
        extra_checkpoint_data = checkpointer.load(map_location=device)
        if isinstance(extra_checkpoint_data, dict):
            arguments['cur_epoch'] = extra_checkpoint_data['cur_epoch']
            if cfg.LR_SCHEDULER.IS_WARMUP:
                logger.info('warmup start')
                if lr_scheduler.finished:
                    optimizer.load_state_dict(
                        lr_scheduler.after_scheduler.optimizer.state_dict())
                else:
                    optimizer.load_state_dict(
                        lr_scheduler.optimizer.state_dict())
                lr_scheduler.optimizer = optimizer
                lr_scheduler.after_scheduler.optimizer = optimizer
                logger.info('warmup end')
        logger.info('resume end')

    data_loader = build_dataloader(cfg, is_train=True)

    logger.info('init end')
    synchronize()
    do_train(cfg, arguments, data_loader, model, criterion, optimizer,
             lr_scheduler, checkpointer, device)
示例#4
0
def train(gpu, args, cfg):
    rank = args.nr * args.gpus + gpu
    setup(rank, args.world_size)

    logger = setup_logger(cfg.TRAIN.NAME)
    arguments = {"iteration": 0}

    torch.cuda.set_device(gpu)
    device = torch.device(f'cuda:{gpu}' if torch.cuda.is_available() else 'cpu')
    map_location = {'cuda:%d' % 0: 'cuda:%d' % rank}

    model = build_model(cfg, gpu, map_location=map_location)
    criterion = build_criterion(cfg)
    optimizer = build_optimizer(cfg, model)
    lr_scheduler = build_lr_scheduler(cfg, optimizer)

    checkpointer = CheckPointer(model, optimizer=optimizer, scheduler=lr_scheduler, save_dir=cfg.OUTPUT.DIR,
                                save_to_disk=True, logger=logger)
    if args.resume:
        if is_master_proc():
            logger.info('resume ...')
        extra_checkpoint_data = checkpointer.load(map_location=map_location, rank=rank)
        if extra_checkpoint_data != dict():
            arguments['iteration'] = extra_checkpoint_data['iteration']
            if cfg.LR_SCHEDULER.IS_WARMUP:
                if is_master_proc():
                    logger.info('warmup ...')
                if lr_scheduler.finished:
                    optimizer.load_state_dict(lr_scheduler.after_scheduler.optimizer.state_dict())
                else:
                    optimizer.load_state_dict(lr_scheduler.optimizer.state_dict())
                lr_scheduler.optimizer = optimizer
                lr_scheduler.after_scheduler.optimizer = optimizer

    data_loader = build_dataloader(cfg, is_train=True, start_iter=arguments['iteration'])

    synchronize()
    do_train(args, cfg, arguments,
             data_loader, model, criterion, optimizer, lr_scheduler,
             checkpointer, device, logger)
    cleanup()
示例#5
0
文件: train.py 项目: ZJCV/TRN
def train(gpu, args, cfg):
    rank = args.nr * args.gpus + gpu
    setup(rank, args.world_size, args.gpus)

    logger = setup_logger(cfg.TRAIN.NAME)
    arguments = {"iteration": 0}
    arguments['rank'] = rank

    device = torch.device(
        f'cuda:{gpu}' if torch.cuda.is_available() else 'cpu')
    map_location = {'cuda:%d' % 0: 'cuda:%d' % rank}
    model = build_model(cfg, map_location=map_location).to(device)
    if cfg.MODEL.PRETRAINED != "":
        if rank == 0 and logger:
            logger.info(f'load pretrained: {cfg.MODEL.PRETRAINED}')
        checkpointer = CheckPointer(model, logger=logger)
        checkpointer.load(cfg.MODEL.PRETRAINED,
                          map_location=map_location,
                          rank=rank)

    if args.gpus > 1:
        model = DDP(model, device_ids=[gpu], find_unused_parameters=True)
    criterion = build_criterion(cfg)
    optimizer = build_optimizer(cfg, model)
    lr_scheduler = build_lr_scheduler(cfg, optimizer)

    checkpointer = CheckPointer(model,
                                optimizer=optimizer,
                                scheduler=lr_scheduler,
                                save_dir=cfg.OUTPUT.DIR,
                                save_to_disk=True,
                                logger=logger)
    if args.resume:
        if rank == 0:
            logger.info('resume ...')
        extra_checkpoint_data = checkpointer.load(map_location=map_location,
                                                  rank=rank)
        if extra_checkpoint_data != dict():
            arguments['iteration'] = extra_checkpoint_data['iteration']
            if cfg.LR_SCHEDULER.WARMUP:
                if rank == 0:
                    logger.info('warmup ...')
                if lr_scheduler.finished:
                    optimizer.load_state_dict(
                        lr_scheduler.after_scheduler.optimizer.state_dict())
                else:
                    optimizer.load_state_dict(
                        lr_scheduler.optimizer.state_dict())
                lr_scheduler.optimizer = optimizer
                lr_scheduler.after_scheduler.optimizer = optimizer

    data_loader = build_dataloader(cfg,
                                   train=True,
                                   start_iter=arguments['iteration'],
                                   world_size=args.world_size,
                                   rank=rank)

    model = do_train(args, cfg, arguments, data_loader, model, criterion,
                     optimizer, lr_scheduler, checkpointer, device, logger)

    if rank == 0 and not args.stop_eval:
        logger.info('Start final evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, device)

    cleanup()