Пример #1
0
def main():
    # 1. Get input arguments
    args = get_args()

    # 2. Create config instance from args above
    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available()
    if args.config_file:
        cfg.merge_from_file(args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)
    set_random_seed(cfg.train.seed)

    log_name = 'test.log' if cfg.test.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))

    print('Show configuration\n{}\n'.format(cfg))
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    # 3. Create DataManager Instance
    datamanager = build_datamanager(cfg)

    print('Building model: {}'.format(cfg.model.name))
    model = torchreid.models.build_model(
        name=cfg.model.name,
        num_classes=datamanager.num_train_pids,
        loss=cfg.loss.name,
        pretrained=cfg.model.pretrained,
        use_gpu=cfg.use_gpu)
    num_params, flops = compute_model_complexity(
        model, (1, 3, cfg.data.height, cfg.data.width))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if cfg.model.load_weights and check_isfile(cfg.model.load_weights):
        load_pretrained_weights(model, cfg.model.load_weights)

    if cfg.use_gpu:
        model = nn.DataParallel(model).cuda()

    optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg))
    scheduler = torchreid.optim.build_lr_scheduler(optimizer,
                                                   **lr_scheduler_kwargs(cfg))

    if cfg.model.resume and check_isfile(cfg.model.resume):
        cfg.train.start_epoch = resume_from_checkpoint(cfg.model.resume,
                                                       model,
                                                       optimizer=optimizer,
                                                       scheduler=scheduler)

    print('Building {}-engine for {}-reid'.format(cfg.loss.name,
                                                  cfg.data.type))

    # Build engine and run
    engine = build_engine(cfg, datamanager, model, optimizer, scheduler)
    engine.run(**engine_run_kwargs(cfg))
Пример #2
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = torch.cuda.is_available() and not args.use_cpu
    log_name = 'test.log' if args.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('** Arguments **')
    arg_keys = list(args.__dict__.keys())
    arg_keys.sort()
    for key in arg_keys:
        print('{}: {}'.format(key, args.__dict__[key]))
    print('\n')
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))
    if use_gpu:
        torch.backends.cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    datamanager = build_datamanager(args)

    print('Building model: {}'.format(args.arch))
    model = torchreid.models.build_model(
        name=args.arch,
        num_classes=datamanager.num_train_pids,
        loss=args.loss.lower(),
        pretrained=(not args.no_pretrained),
        use_gpu=use_gpu)
    num_params, flops = compute_model_complexity(
        model, (1, 3, args.height, args.width))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    optimizer = torchreid.optim.build_optimizer(model,
                                                **optimizer_kwargs(args))

    scheduler = torchreid.optim.build_lr_scheduler(optimizer,
                                                   **lr_scheduler_kwargs(args))

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume,
                                                  model,
                                                  optimizer=optimizer)

    print('Building {}-engine for {}-reid'.format(args.loss, args.app))
    engine = build_engine(args, datamanager, model, optimizer, scheduler)

    engine.run(**engine_run_kwargs(args))
Пример #3
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--config-file', type=str, default='', help='path to config file')
    parser.add_argument('-s', '--sources', type=str, nargs='+', help='source datasets (delimited by space)')
    parser.add_argument('-t', '--targets', type=str, nargs='+', help='target datasets (delimited by space)')
    parser.add_argument('--root', type=str, default='', help='path to data root')
    parser.add_argument('opts', default=None, nargs=argparse.REMAINDER,
                        help='Modify config options using the command-line')
    args = parser.parse_args()

    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available()
    if args.config_file:
        cfg.merge_from_file(args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)
    set_random_seed(cfg.train.seed)

    log_name = 'test.log' if cfg.test.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))

    print('Show configuration\n{}\n'.format(cfg))
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    datamanager = build_datamanager(cfg)

    print('Building model: {}'.format(cfg.model.name))
    model = torchreid.models.build_model(**model_kwargs(cfg, datamanager.num_train_pids))
    num_params, flops = compute_model_complexity(model, (1, 3, cfg.data.height, cfg.data.width))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if cfg.model.load_weights and check_isfile(cfg.model.load_weights):
        if cfg.model.pretrained and not cfg.test.evaluate:
            state_dict = torch.load(cfg.model.load_weights)
            model.load_pretrained_weights(state_dict)
        else:
            load_pretrained_weights(model, cfg.model.load_weights)

    if cfg.use_gpu:
        model = nn.DataParallel(model).cuda()

    optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg))
    scheduler = torchreid.optim.build_lr_scheduler(optimizer, **lr_scheduler_kwargs(cfg))

    if cfg.model.resume and check_isfile(cfg.model.resume):
        cfg.train.start_epoch = resume_from_checkpoint(
            cfg.model.resume, model, optimizer=optimizer, scheduler=scheduler
        )

    print('Building {}-engine for {}-reid'.format(cfg.loss.name, cfg.data.type))
    engine = build_engine(cfg, datamanager, model, optimizer, scheduler)
    engine.run(**engine_run_kwargs(cfg))
Пример #4
0
def run_lr_finder(cfg,
                  datamanager,
                  model,
                  optimizer,
                  scheduler,
                  classes,
                  rebuild_model=True,
                  gpu_num=1,
                  split_models=False):
    if not rebuild_model:
        backup_model = deepcopy(model)

    engine = build_engine(cfg,
                          datamanager,
                          model,
                          optimizer,
                          scheduler,
                          initial_lr=cfg.train.lr)
    lr_finder = LrFinder(engine=engine, **lr_finder_run_kwargs(cfg))
    aux_lr = lr_finder.process()

    print(f"Estimated learning rate: {aux_lr}")
    if cfg.lr_finder.stop_after:
        print("Finding learning rate finished. Terminate the training process")
        sys.exit(0)

    # reload all parts of the training
    # we do not check classification parameters
    # and do not get num_train_classes the second time
    # since it's done above and lr finder cannot change parameters of the datasets
    cfg.train.lr = aux_lr
    cfg.lr_finder.enable = False
    set_random_seed(cfg.train.seed, cfg.train.deterministic)
    datamanager = build_datamanager(cfg, classes)
    num_train_classes = datamanager.num_train_pids

    if rebuild_model:
        backup_model = torchreid.models.build_model(
            **model_kwargs(cfg, num_train_classes))
        num_aux_models = len(cfg.mutual_learning.aux_configs)
        backup_model, _ = put_main_model_on_the_device(backup_model,
                                                       cfg.use_gpu, gpu_num,
                                                       num_aux_models,
                                                       split_models)

    optimizer = torchreid.optim.build_optimizer(backup_model,
                                                **optimizer_kwargs(cfg))
    scheduler = torchreid.optim.build_lr_scheduler(
        optimizer=optimizer,
        num_iter=datamanager.num_iter,
        **lr_scheduler_kwargs(cfg))

    return cfg.train.lr, backup_model, optimizer, scheduler
Пример #5
0
def main():
    global args

    set_random_seed(args.seed)
    if not args.use_avai_gpus:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    use_gpu = (torch.cuda.is_available() and not args.use_cpu)
    log_name = 'test.log' if args.evaluate else 'train.log'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))
    print('==========\nArgs:{}\n=========='.format(args))
    if use_gpu:
        print('Currently using GPU {}'.format(args.gpu_devices))
        torch.backends.cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    datamanager = build_datamanager(args)

    print('Building model: {}'.format(args.arch))
    model = torchreid.models.build_model(
        name=args.arch,
        num_classes=datamanager.num_train_pids,
        loss=args.loss.lower(),
        pretrained=(not args.no_pretrained),
        use_gpu=use_gpu)
    num_params, flops = compute_model_complexity(
        model, (1, 3, args.height, args.width))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    optimizer = torchreid.optim.build_optimizer(model,
                                                **optimizer_kwargs(args))

    scheduler = torchreid.optim.build_lr_scheduler(optimizer,
                                                   **lr_scheduler_kwargs(args))

    if args.resume and check_isfile(args.resume):
        args.start_epoch = resume_from_checkpoint(args.resume,
                                                  model,
                                                  optimizer=optimizer)

    print('Building {}-engine for {}-reid'.format(args.loss, args.app))
    engine = build_engine(args, datamanager, model, optimizer, scheduler)

    engine.run(**engine_run_kwargs(args))
Пример #6
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        '--config-file', type=str, default='', help='path to config file'
    )
    parser.add_argument(
        '-s',
        '--sources',
        type=str,
        nargs='+',
        help='source datasets (delimited by space)'
    )
    parser.add_argument(
        '-t',
        '--targets',
        type=str,
        nargs='+',
        help='target datasets (delimited by space)'
    )
    parser.add_argument(
        '--transforms', type=str, nargs='+', help='data augmentation'
    )
    parser.add_argument(
        '--root', type=str, default='', help='path to data root'
    )
    parser.add_argument(
        '--gpu-devices',
        type=str,
        default='',
    )
    parser.add_argument(
        'opts',
        default=None,
        nargs=argparse.REMAINDER,
        help='Modify config options using the command-line'
    )
    args = parser.parse_args()

    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available()
    if args.config_file:
        cfg.merge_from_file(args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)
    set_random_seed(cfg.train.seed)

    if cfg.use_gpu and args.gpu_devices:
        # if gpu_devices is not specified, all available gpus will be used
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices
    log_name = 'test.log' if cfg.test.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))

    print('Show configuration\n{}\n'.format(cfg))
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    datamanager = torchreid.data.ImageDataManager(**imagedata_kwargs(cfg))

    print('Building model: {}'.format(cfg.model.name))
    model = osnet_models.build_model(
        cfg.model.name, num_classes=datamanager.num_train_pids
    )
    num_params, flops = compute_model_complexity(
        model, (1, 3, cfg.data.height, cfg.data.width)
    )
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if cfg.use_gpu:
        model = nn.DataParallel(model).cuda()

    optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg))
    scheduler = torchreid.optim.build_lr_scheduler(
        optimizer, **lr_scheduler_kwargs(cfg)
    )

    if cfg.model.resume and check_isfile(cfg.model.resume):
        cfg.train.start_epoch = resume_from_checkpoint(
            cfg.model.resume, model, optimizer=optimizer
        )

    print('Building NAS engine')
    engine = ImageSoftmaxNASEngine(
        datamanager,
        model,
        optimizer,
        scheduler=scheduler,
        use_gpu=cfg.use_gpu,
        label_smooth=cfg.loss.softmax.label_smooth,
        mc_iter=cfg.nas.mc_iter,
        init_lmda=cfg.nas.init_lmda,
        min_lmda=cfg.nas.min_lmda,
        lmda_decay_step=cfg.nas.lmda_decay_step,
        lmda_decay_rate=cfg.nas.lmda_decay_rate,
        fixed_lmda=cfg.nas.fixed_lmda
    )
    engine.run(**engine_run_kwargs(cfg))

    print('*** Display the found architecture ***')
    if cfg.use_gpu:
        model.module.build_child_graph()
    else:
        model.build_child_graph()
Пример #7
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--config-file',
                        type=str,
                        default='',
                        help='path to config file')
    parser.add_argument('-s',
                        '--sources',
                        type=str,
                        nargs='+',
                        help='source datasets (delimited by space)')
    parser.add_argument('-t',
                        '--targets',
                        type=str,
                        nargs='+',
                        help='target datasets (delimited by space)')
    parser.add_argument('--transforms',
                        type=str,
                        nargs='+',
                        help='data augmentation')
    parser.add_argument('--root',
                        type=str,
                        default='',
                        help='path to data root')
    parser.add_argument('opts',
                        default=None,
                        nargs=argparse.REMAINDER,
                        help='Modify config options using the command-line')
    args = parser.parse_args()

    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available()
    if args.config_file:
        cfg.merge_from_file(args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)
    set_random_seed(cfg.train.seed)

    log_name = 'test.log' if cfg.test.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))

    print('Show configuration\n{}\n'.format(cfg))
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    datamanager = torchreid.data.ImageDataManager(**imagedata_kwargs(cfg))

    print('Building model-1: {}'.format(cfg.model.name))
    model1 = torchreid.models.build_model(
        name=cfg.model.name,
        num_classes=datamanager.num_train_pids,
        loss=cfg.loss.name,
        pretrained=cfg.model.pretrained,
        use_gpu=cfg.use_gpu)
    num_params, flops = compute_model_complexity(
        model1, (1, 3, cfg.data.height, cfg.data.width))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    print('Copying model-1 to model-2')
    model2 = copy.deepcopy(model1)

    if cfg.model.load_weights1 and check_isfile(cfg.model.load_weights1):
        load_pretrained_weights(model1, cfg.model.load_weights1)

    if cfg.model.load_weights2 and check_isfile(cfg.model.load_weights2):
        load_pretrained_weights(model2, cfg.model.load_weights2)

    if cfg.use_gpu:
        model1 = nn.DataParallel(model1).cuda()
        model2 = nn.DataParallel(model2).cuda()

    optimizer1 = torchreid.optim.build_optimizer(model1,
                                                 **optimizer_kwargs(cfg))
    scheduler1 = torchreid.optim.build_lr_scheduler(optimizer1,
                                                    **lr_scheduler_kwargs(cfg))

    optimizer2 = torchreid.optim.build_optimizer(model2,
                                                 **optimizer_kwargs(cfg))
    scheduler2 = torchreid.optim.build_lr_scheduler(optimizer2,
                                                    **lr_scheduler_kwargs(cfg))

    if cfg.model.resume1 and check_isfile(cfg.model.resume1):
        cfg.train.start_epoch = resume_from_checkpoint(cfg.model.resume1,
                                                       model1,
                                                       optimizer=optimizer1,
                                                       scheduler=scheduler1)

    if cfg.model.resume2 and check_isfile(cfg.model.resume2):
        resume_from_checkpoint(cfg.model.resume2,
                               model2,
                               optimizer=optimizer2,
                               scheduler=scheduler2)

    print('Building DML-engine for image-reid')
    engine = ImageDMLEngine(datamanager,
                            model1,
                            optimizer1,
                            scheduler1,
                            model2,
                            optimizer2,
                            scheduler2,
                            margin=cfg.loss.triplet.margin,
                            weight_t=cfg.loss.triplet.weight_t,
                            weight_x=cfg.loss.triplet.weight_x,
                            weight_ml=cfg.loss.dml.weight_ml,
                            use_gpu=cfg.use_gpu,
                            label_smooth=cfg.loss.softmax.label_smooth,
                            deploy=cfg.model.deploy)
    engine.run(**engine_run_kwargs(cfg))
Пример #8
0
def main():
    parser = build_base_argparser()
    parser.add_argument('-e',
                        '--auxiliary-models-cfg',
                        type=str,
                        nargs='*',
                        default='',
                        help='path to extra config files')
    parser.add_argument('--split-models',
                        action='store_true',
                        help='whether to split models on own gpu')
    parser.add_argument('--enable_quantization',
                        action='store_true',
                        help='Enable NNCF quantization algorithm')
    parser.add_argument('--enable_pruning',
                        action='store_true',
                        help='Enable NNCF pruning algorithm')
    parser.add_argument(
        '--aux-config-opts',
        nargs='+',
        default=None,
        help='Modify aux config options using the command-line')
    args = parser.parse_args()

    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available() and args.gpu_num > 0
    if args.config_file:
        merge_from_files_with_base(cfg, args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)

    is_nncf_used = args.enable_quantization or args.enable_pruning
    if is_nncf_used:
        print(f'Using NNCF -- making NNCF changes in config')
        cfg = make_nncf_changes_in_config(cfg, args.enable_quantization,
                                          args.enable_pruning, args.opts)

    set_random_seed(cfg.train.seed, cfg.train.deterministic)

    log_name = 'test.log' if cfg.test.evaluate else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))

    print('Show configuration\n{}\n'.format(cfg))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    num_aux_models = len(cfg.mutual_learning.aux_configs)
    datamanager = build_datamanager(cfg, args.classes)
    num_train_classes = datamanager.num_train_pids

    print('Building main model: {}'.format(cfg.model.name))
    model = torchreid.models.build_model(
        **model_kwargs(cfg, num_train_classes))
    macs, num_params = get_model_complexity_info(
        model, (3, cfg.data.height, cfg.data.width),
        as_strings=False,
        verbose=False,
        print_per_layer_stat=False)
    print('Main model complexity: params={:,} flops={:,}'.format(
        num_params, macs * 2))

    aux_lr = cfg.train.lr  # placeholder, needed for aux models, may be filled by nncf part below
    if is_nncf_used:
        print('Begin making NNCF changes in model')
        if cfg.use_gpu:
            model.cuda()

        compression_ctrl, model, cfg, aux_lr, nncf_metainfo = \
            make_nncf_changes_in_training(model, cfg,
                                          args.classes,
                                          args.opts)

        should_freeze_aux_models = True
        print(f'should_freeze_aux_models = {should_freeze_aux_models}')
        print('End making NNCF changes in model')
    else:
        compression_ctrl = None
        should_freeze_aux_models = False
        nncf_metainfo = None
    # creating optimizer and scheduler -- it should be done after NNCF part, since
    # NNCF could change some parameters
    optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg))

    if cfg.lr_finder.enable and not cfg.model.resume:
        scheduler = None
    else:
        scheduler = torchreid.optim.build_lr_scheduler(
            optimizer=optimizer,
            num_iter=datamanager.num_iter,
            **lr_scheduler_kwargs(cfg))
    # Loading model (and optimizer and scheduler in case of resuming training).
    # Note that if NNCF is used, loading is done inside NNCF part, so loading here is not required.
    if cfg.model.resume and check_isfile(
            cfg.model.resume) and not is_nncf_used:
        device_ = 'cuda' if cfg.use_gpu else 'cpu'
        cfg.train.start_epoch = resume_from_checkpoint(cfg.model.resume,
                                                       model,
                                                       optimizer=optimizer,
                                                       scheduler=scheduler,
                                                       device=device_)
    elif cfg.model.load_weights and not is_nncf_used:
        load_pretrained_weights(model, cfg.model.load_weights)

    if cfg.model.type == 'classification':
        check_classification_classes(model,
                                     datamanager,
                                     args.classes,
                                     test_only=cfg.test.evaluate)

    model, extra_device_ids = put_main_model_on_the_device(
        model, cfg.use_gpu, args.gpu_num, num_aux_models, args.split_models)

    if cfg.lr_finder.enable and not cfg.test.evaluate and not cfg.model.resume:
        aux_lr, model, optimizer, scheduler = run_lr_finder(
            cfg,
            datamanager,
            model,
            optimizer,
            scheduler,
            args.classes,
            rebuild_model=True,
            gpu_num=args.gpu_num,
            split_models=args.split_models)

    log_dir = cfg.data.tb_log_dir if cfg.data.tb_log_dir else cfg.data.save_dir
    run_training(cfg,
                 datamanager,
                 model,
                 optimizer,
                 scheduler,
                 extra_device_ids,
                 aux_lr,
                 tb_writer=SummaryWriter(log_dir=log_dir),
                 should_freeze_aux_models=should_freeze_aux_models,
                 nncf_metainfo=nncf_metainfo,
                 compression_ctrl=compression_ctrl)
Пример #9
0
    def train(self,
              dataset: DatasetEntity,
              output_model: ModelEntity,
              train_parameters: Optional[TrainParameters] = None):
        """ Trains a model on a dataset """

        train_model = deepcopy(self._model)

        if train_parameters is not None:
            update_progress_callback = train_parameters.update_progress
        else:
            update_progress_callback = default_progress_callback
        time_monitor = TrainingProgressCallback(
            update_progress_callback,
            num_epoch=self._cfg.train.max_epoch,
            num_train_steps=math.ceil(
                len(dataset.get_subset(Subset.TRAINING)) /
                self._cfg.train.batch_size),
            num_val_steps=0,
            num_test_steps=0)

        self.metrics_monitor = DefaultMetricsMonitor()
        self.stop_callback.reset()

        set_random_seed(self._cfg.train.seed)
        train_subset = dataset.get_subset(Subset.TRAINING)
        val_subset = dataset.get_subset(Subset.VALIDATION)
        self._cfg.custom_datasets.roots = [
            OTEClassificationDataset(train_subset,
                                     self._labels,
                                     self._multilabel,
                                     keep_empty_label=self._empty_label
                                     in self._labels),
            OTEClassificationDataset(val_subset,
                                     self._labels,
                                     self._multilabel,
                                     keep_empty_label=self._empty_label
                                     in self._labels)
        ]
        datamanager = torchreid.data.ImageDataManager(
            **imagedata_kwargs(self._cfg))

        num_aux_models = len(self._cfg.mutual_learning.aux_configs)

        if self._cfg.use_gpu:
            main_device_ids = list(range(self.num_devices))
            extra_device_ids = [main_device_ids for _ in range(num_aux_models)]
            train_model = DataParallel(train_model,
                                       device_ids=main_device_ids,
                                       output_device=0).cuda(
                                           main_device_ids[0])
        else:
            extra_device_ids = [None for _ in range(num_aux_models)]

        optimizer = torchreid.optim.build_optimizer(
            train_model, **optimizer_kwargs(self._cfg))

        if self._cfg.lr_finder.enable:
            scheduler = None
        else:
            scheduler = torchreid.optim.build_lr_scheduler(
                optimizer,
                num_iter=datamanager.num_iter,
                **lr_scheduler_kwargs(self._cfg))

        if self._cfg.lr_finder.enable:
            _, train_model, optimizer, scheduler = \
                        run_lr_finder(self._cfg, datamanager, train_model, optimizer, scheduler, None,
                                      rebuild_model=False, gpu_num=self.num_devices, split_models=False)

        _, final_acc = run_training(self._cfg,
                                    datamanager,
                                    train_model,
                                    optimizer,
                                    scheduler,
                                    extra_device_ids,
                                    self._cfg.train.lr,
                                    tb_writer=self.metrics_monitor,
                                    perf_monitor=time_monitor,
                                    stop_callback=self.stop_callback)

        training_metrics = self._generate_training_metrics_group()

        self.metrics_monitor.close()
        if self.stop_callback.check_stop():
            logger.info('Training cancelled.')
            return

        logger.info("Training finished.")

        best_snap_path = os.path.join(self._scratch_space, 'best.pth')
        if os.path.isfile(best_snap_path):
            load_pretrained_weights(self._model, best_snap_path)

        for filename in os.listdir(self._scratch_space):
            match = re.match(r'best_(aux_model_[0-9]+\.pth)', filename)
            if match:
                aux_model_name = match.group(1)
                best_aux_snap_path = os.path.join(self._scratch_space,
                                                  filename)
                self._aux_model_snap_paths[aux_model_name] = best_aux_snap_path

        self.save_model(output_model)
        performance = Performance(score=ScoreMetric(value=final_acc,
                                                    name="accuracy"),
                                  dashboard_metrics=training_metrics)
        logger.info(f'FINAL MODEL PERFORMANCE {performance}')
        output_model.performance = performance
Пример #10
0
def main():
    parser = build_base_argparser()
    args = parser.parse_args()

    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available() and args.gpu_num > 0
    if args.config_file:
        merge_from_files_with_base(cfg, args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)

    is_ie_model = cfg.model.load_weights.endswith('.xml')
    if not is_ie_model:
        compression_hyperparams = get_compression_hyperparams(
            cfg.model.load_weights)
        is_nncf_used = compression_hyperparams[
            'enable_quantization'] or compression_hyperparams['enable_pruning']

        if is_nncf_used:
            print(f'Using NNCF -- making NNCF changes in config')
            cfg = make_nncf_changes_in_config(
                cfg, compression_hyperparams['enable_quantization'],
                compression_hyperparams['enable_pruning'], args.opts)
    else:
        is_nncf_used = False

    set_random_seed(cfg.train.seed)

    log_name = 'test.log' + time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))
    datamanager = torchreid.data.ImageDataManager(filter_classes=args.classes,
                                                  **imagedata_kwargs(cfg))
    num_classes = len(
        datamanager.test_loader[cfg.data.targets[0]]['query'].dataset.classes)
    cfg.train.ema.enable = False
    if not is_ie_model:
        model = torchreid.models.build_model(**model_kwargs(cfg, num_classes))
        load_pretrained_weights(model, cfg.model.load_weights)
        if is_nncf_used:
            print('Begin making NNCF changes in model')
            model = make_nncf_changes_in_eval(model, cfg)
            print('End making NNCF changes in model')
        if cfg.use_gpu:
            num_devices = min(torch.cuda.device_count(), args.gpu_num)
            main_device_ids = list(range(num_devices))
            model = DataParallel(model,
                                 device_ids=main_device_ids,
                                 output_device=0).cuda(main_device_ids[0])
    else:
        from torchreid.utils.ie_tools import VectorCNN
        from openvino.inference_engine import IECore
        cfg.test.batch_size = 1
        model = VectorCNN(IECore(),
                          cfg.model.load_weights,
                          'CPU',
                          switch_rb=True,
                          **model_kwargs(cfg, num_classes))
        for _, dataloader in datamanager.test_loader.items():
            dataloader['query'].dataset.transform.transforms = \
                dataloader['query'].dataset.transform.transforms[:-2]

    if cfg.model.type == 'classification':
        check_classification_classes(model,
                                     datamanager,
                                     args.classes,
                                     test_only=True)

    engine = build_engine(cfg=cfg,
                          datamanager=datamanager,
                          model=model,
                          optimizer=None,
                          scheduler=None)
    engine.test(0,
                dist_metric=cfg.test.dist_metric,
                normalize_feature=cfg.test.normalize_feature,
                visrank=cfg.test.visrank,
                visrank_topk=cfg.test.visrank_topk,
                save_dir=cfg.data.save_dir,
                use_metric_cuhk03=cfg.cuhk03.use_metric_cuhk03,
                ranks=(1, 5, 10, 20),
                rerank=cfg.test.rerank)
Пример #11
0
def main():
    global args

    set_random_seed(args.seed)
    use_gpu = torch.cuda.is_available() and not args.use_cpu
    log_name = 'test.log' if args.evaluate else 'train.log'
    sys.stdout = Logger(osp.join(args.save_dir, log_name))

    print('** Arguments **')
    arg_keys = list(args.__dict__.keys())
    arg_keys.sort()
    for key in arg_keys:
        print('{}: {}'.format(key, args.__dict__[key]))
    print('\n')
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))

    if use_gpu:
        torch.backends.cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    dataset_vars = init_dataset(use_gpu)
    trainloader, valloader, testloader, num_attrs, attr_dict = dataset_vars

    if args.weighted_bce:
        print('Use weighted binary cross entropy')
        print('Computing the weights ...')
        bce_weights = torch.zeros(num_attrs, dtype=torch.float)
        for _, attrs, _ in trainloader:
            bce_weights += attrs.sum(0)  # sum along the batch dim
        bce_weights /= len(trainloader) * args.batch_size
        print('Sample ratio for each attribute: {}'.format(bce_weights))
        bce_weights = torch.exp(-1 * bce_weights)
        print('BCE weights: {}'.format(bce_weights))
        bce_weights = bce_weights.expand(args.batch_size, num_attrs)
        criterion = nn.BCEWithLogitsLoss(weight=bce_weights)

    else:
        print('Use plain binary cross entropy')
        criterion = nn.BCEWithLogitsLoss()

    print('Building model: {}'.format(args.arch))
    model = models.build_model(args.arch,
                               num_attrs,
                               pretrained=not args.no_pretrained,
                               use_gpu=use_gpu)
    num_params, flops = compute_model_complexity(
        model, (1, 3, args.height, args.width))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    if args.load_weights and check_isfile(args.load_weights):
        load_pretrained_weights(model, args.load_weights)

    if use_gpu:
        model = nn.DataParallel(model).cuda()
        criterion = criterion.cuda()

    if args.evaluate:
        test(model, testloader, attr_dict, use_gpu)
        return

    optimizer = torchreid.optim.build_optimizer(model,
                                                **optimizer_kwargs(args))
    scheduler = torchreid.optim.build_lr_scheduler(optimizer,
                                                   **lr_scheduler_kwargs(args))

    start_epoch = args.start_epoch
    best_result = -np.inf
    if args.resume and check_isfile(args.resume):
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch']
        best_result = checkpoint['label_mA']
        print('Loaded checkpoint from "{}"'.format(args.resume))
        print('- start epoch: {}'.format(start_epoch))
        print('- label_mA: {}'.format(best_result))

    time_start = time.time()

    for epoch in range(start_epoch, args.max_epoch):
        train(epoch, model, criterion, optimizer, scheduler, trainloader,
              use_gpu)
        test_outputs = test(model, testloader, attr_dict, use_gpu)
        label_mA = test_outputs[0]
        is_best = label_mA > best_result
        if is_best:
            best_result = label_mA

        save_checkpoint(
            {
                'state_dict': model.state_dict(),
                'epoch': epoch + 1,
                'label_mA': label_mA,
                'optimizer': optimizer.state_dict(),
            },
            args.save_dir,
            is_best=is_best)

    elapsed = round(time.time() - time_start)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print('Elapsed {}'.format(elapsed))
Пример #12
0
def main():
    # parse arguments
    parser = build_base_argparser()
    parser.add_argument('-e',
                        '--auxiliary-models-cfg',
                        type=str,
                        nargs='*',
                        default='',
                        help='path to extra config files')
    parser.add_argument('--split-models',
                        action='store_true',
                        help='whether to split models on own gpu')
    parser.add_argument(
        '--aux-config-opts',
        nargs='+',
        default=None,
        help='Modify aux config options using the command-line')
    parser.add_argument('--epochs',
                        default=10,
                        type=int,
                        help='amount of the epochs')

    args = parser.parse_args()
    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available() and args.gpu_num > 0
    if args.config_file:
        merge_from_files_with_base(cfg, args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)

    set_random_seed(cfg.train.seed, cfg.train.deterministic)

    log_name = 'optuna.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(cfg.data.save_dir, log_name))

    print('Show configuration\n{}\n'.format(cfg))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    sampler = TPESampler(n_startup_trials=5, seed=True)
    study = optuna.create_study(study_name='classification task',
                                direction="maximize",
                                sampler=sampler)
    objective_partial = partial(objective, cfg, args)
    try:
        start_time = time.time()
        study.optimize(objective_partial,
                       n_trials=cfg.lr_finder.n_trials,
                       timeout=None)
        elapsed = round(time.time() - start_time)
        print(
            f"--- optimization is finished: {datetime.timedelta(seconds=elapsed)} ---"
        )

    except KeyboardInterrupt:
        finish_process(study)

    else:
        finish_process(study)
Пример #13
0
    def optimize(
        self,
        optimization_type: OptimizationType,
        dataset: DatasetEntity,
        output_model: ModelEntity,
        optimization_parameters: Optional[OptimizationParameters],
    ):
        """ Optimize a model on a dataset """
        if optimization_type is not OptimizationType.NNCF:
            raise RuntimeError('NNCF is the only supported optimization')
        if self._compression_ctrl:
            raise RuntimeError('The model is already optimized. NNCF requires the original model for optimization.')
        if self._cfg.train.ema.enable:
            raise RuntimeError('EMA model could not be used together with NNCF compression')
        if self._cfg.lr_finder.enable:
            raise RuntimeError('LR finder could not be used together with NNCF compression')

        aux_pretrained_dicts = self._load_aux_models_data(self._task_environment.model)
        num_aux_models = len(self._cfg.mutual_learning.aux_configs)
        num_aux_pretrained_dicts = len(aux_pretrained_dicts)
        if num_aux_models != num_aux_pretrained_dicts:
            raise RuntimeError('The pretrained weights are not provided for all aux models.')

        if optimization_parameters is not None:
            update_progress_callback = optimization_parameters.update_progress
        else:
            update_progress_callback = default_progress_callback
        time_monitor = TrainingProgressCallback(update_progress_callback, num_epoch=self._cfg.train.max_epoch,
                                                num_train_steps=math.ceil(len(dataset.get_subset(Subset.TRAINING)) /
                                                                          self._cfg.train.batch_size),
                                                num_val_steps=0, num_test_steps=0)

        self.metrics_monitor = DefaultMetricsMonitor()
        self.stop_callback.reset()

        set_random_seed(self._cfg.train.seed)
        train_subset = dataset.get_subset(Subset.TRAINING)
        val_subset = dataset.get_subset(Subset.VALIDATION)
        self._cfg.custom_datasets.roots = [OTEClassificationDataset(train_subset, self._labels, self._multilabel,
                                                                    keep_empty_label=self._empty_label in self._labels),
                                           OTEClassificationDataset(val_subset, self._labels, self._multilabel,
                                                                    keep_empty_label=self._empty_label in self._labels)]
        datamanager = torchreid.data.ImageDataManager(**imagedata_kwargs(self._cfg))

        self._compression_ctrl, self._model, self._nncf_metainfo = \
            wrap_nncf_model(self._model, self._cfg, datamanager_for_init=datamanager)

        self._cfg.train.lr = calculate_lr_for_nncf_training(self._cfg, self._initial_lr, False)

        train_model = self._model
        if self._cfg.use_gpu:
            main_device_ids = list(range(self.num_devices))
            extra_device_ids = [main_device_ids for _ in range(num_aux_models)]
            train_model = DataParallel(train_model, device_ids=main_device_ids,
                                       output_device=0).cuda(main_device_ids[0])
        else:
            extra_device_ids = [None for _ in range(num_aux_models)]

        optimizer = torchreid.optim.build_optimizer(train_model, **optimizer_kwargs(self._cfg))

        scheduler = torchreid.optim.build_lr_scheduler(optimizer, num_iter=datamanager.num_iter,
                                                       **lr_scheduler_kwargs(self._cfg))

        logger.info('Start training')
        run_training(self._cfg, datamanager, train_model, optimizer,
                     scheduler, extra_device_ids, self._cfg.train.lr,
                     should_freeze_aux_models=True,
                     aux_pretrained_dicts=aux_pretrained_dicts,
                     tb_writer=self.metrics_monitor,
                     perf_monitor=time_monitor,
                     stop_callback=self.stop_callback,
                     nncf_metainfo=self._nncf_metainfo,
                     compression_ctrl=self._compression_ctrl)

        self.metrics_monitor.close()
        if self.stop_callback.check_stop():
            logger.info('Training cancelled.')
            return

        logger.info('Training completed')

        self.save_model(output_model)

        output_model.model_format = ModelFormat.BASE_FRAMEWORK
        output_model.optimization_type = self._optimization_type
        output_model.optimization_methods = self._optimization_methods
        output_model.precision = self._precision
Пример #14
0
def main():

    # Load model configuration
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--config',
                        required=True,
                        help='path to configuration file')
    args = parser.parse_args()
    with open(args.config, "r") as ymlfile:
        config = yaml.load(ymlfile, Loader=yaml.FullLoader)

    # Automatically add sub-folder name to config["save_dir"], with the same name
    # as the config file. For example, config["save_dir"] is typically "logs",
    # so this would change config["save_dir"] to "logs/exp01", for example, so that
    # we don't need to change the save_dir in every single config file (it instead
    # automatically generates it from the name of the config file).
    experiment_number = pathlib.Path(args.config).stem
    config["save_dir"] = os.path.join(config["save_dir"], experiment_number)

    # Set random seeds
    set_random_seed(config["seed"])

    # Set up GPU
    if not config["use_avai_gpus"]:
        os.environ['CUDA_VISIBLE_DEVICES'] = config["gpu_devices"]
    use_gpu = torch.cuda.is_available() and not config["use_cpu"]

    # Set up log files
    log_name = 'test.log' if config["evaluate"] else 'train.log'
    log_name += time.strftime('-%Y-%m-%d-%H-%M-%S')
    sys.stdout = Logger(osp.join(config["save_dir"], log_name))

    # Prepare for training
    print('==========\nArgs:{}\n=========='.format(config))
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))
    if use_gpu:
        torch.backends.cudnn.benchmark = True
    else:
        warnings.warn(
            'Currently using CPU, however, GPU is highly recommended')

    # Build datamanager and model
    datamanager = build_datamanager(config)

    print('Building model: {}'.format(config["arch"]))
    model = torchreid.models.build_model(
        name=config["arch"],
        num_classes=datamanager.num_train_pids,
        loss=config["loss"].lower(),
        pretrained=(not config["no_pretrained"]),
        use_gpu=use_gpu)

    # Compute model complexity
    num_params, flops = compute_model_complexity(
        model, (1, 3, config["height"], config["width"]))
    print('Model complexity: params={:,} flops={:,}'.format(num_params, flops))

    # Load pretrained weights if necessary
    if config["load_weights"] and check_isfile(config["load_weights"]):
        load_pretrained_weights(model, config["load_weights"])

    # Set up multi-gpu
    if use_gpu:
        model = nn.DataParallel(model).cuda()

    # Model settings
    optimizer = torchreid.optim.build_optimizer(model,
                                                **optimizer_kwargs(config))
    scheduler = torchreid.optim.build_lr_scheduler(
        optimizer, **lr_scheduler_kwargs(config))

    if config["resume"] and check_isfile(config["resume"]):
        config["start_epoch"] = resume_from_checkpoint(config["resume"],
                                                       model,
                                                       optimizer=optimizer)

    print('Building {}-engine for {}-reid'.format(config["loss"],
                                                  config["app"]))
    engine = build_engine(config, datamanager, model, optimizer, scheduler)

    engine.run(**engine_run_kwargs(config))
Пример #15
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--config-file',
                        type=str,
                        default='',
                        required=True,
                        help='path to config file')
    parser.add_argument(
        '--custom-roots',
        type=str,
        nargs='+',
        help=
        'types or paths to annotation of custom datasets (delimited by space)')
    parser.add_argument('--custom-types',
                        type=str,
                        nargs='+',
                        help='path of custom datasets (delimited by space)')
    parser.add_argument('--custom-names',
                        type=str,
                        nargs='+',
                        help='names of custom datasets (delimited by space)')
    parser.add_argument('--root',
                        type=str,
                        default='',
                        help='path to data root')
    parser.add_argument('--classes',
                        type=str,
                        nargs='+',
                        help='name of classes in classification dataset')
    parser.add_argument('--out')
    parser.add_argument('opts',
                        default=None,
                        nargs=argparse.REMAINDER,
                        help='Modify config options using the command-line')
    args = parser.parse_args()

    cfg = get_default_config()
    cfg.use_gpu = torch.cuda.is_available()
    if args.config_file:
        merge_from_files_with_base(cfg, args.config_file)
    reset_config(cfg, args)
    cfg.merge_from_list(args.opts)
    set_random_seed(cfg.train.seed)

    print('Show configuration\n{}\n'.format(cfg))
    print('Collecting env info ...')
    print('** System info **\n{}\n'.format(collect_env_info()))

    if cfg.use_gpu:
        torch.backends.cudnn.benchmark = True

    datamanager = build_datamanager(cfg, args.classes)
    num_train_classes = datamanager.num_train_pids

    print('Building main model: {}'.format(cfg.model.name))
    model = torchreid.models.build_model(
        **model_kwargs(cfg, num_train_classes))
    macs, num_params = get_model_complexity_info(
        model, (3, cfg.data.height, cfg.data.width),
        as_strings=False,
        verbose=False,
        print_per_layer_stat=False)
    print('Main model complexity: M params={:,} G flops={:,}'.format(
        num_params / 10**6, macs * 2 / 10**9))

    if args.out:
        out = list()
        out.append({
            'key': 'size',
            'display_name': 'Size',
            'value': num_params / 10**6,
            'unit': 'Mp'
        })
        out.append({
            'key': 'complexity',
            'display_name': 'Complexity',
            'value': 2 * macs / 10**9,
            'unit': 'GFLOPs'
        })
        print('dump to' + args.out)
        with open(args.out, 'w') as write_file:
            json.dump(out, write_file, indent=4)