def test_case_calculate_fid_stat_CIFAR():
    from template_lib.d2.data import build_dataset_mapper
    from template_lib.d2template.trainer.base_trainer import build_detection_test_loader
    from template_lib.v2.GAN.evaluation import build_GAN_metric
    from template_lib.d2.utils.d2_utils import D2Utils
    from template_lib.v2.config_cfgnode import global_cfg

    from detectron2.utils import logger
    logger.setup_logger('d2')

    cfg = D2Utils.create_cfg()
    cfg.update(global_cfg)
    global_cfg.merge_from_dict(cfg)

    # fmt: off
    dataset_name                 = cfg.dataset_name
    IMS_PER_BATCH                = cfg.IMS_PER_BATCH
    img_size                     = cfg.img_size
    dataset_mapper_cfg           = cfg.dataset_mapper_cfg
    GAN_metric                   = cfg.GAN_metric
    # fmt: on

    num_workers = comm.get_world_size()
    batch_size = IMS_PER_BATCH // num_workers

    dataset_mapper = build_dataset_mapper(dataset_mapper_cfg, img_size=img_size)
    data_loader = build_detection_test_loader(
      cfg, dataset_name=dataset_name, batch_size=batch_size, mapper=dataset_mapper)

    FID_IS_tf = build_GAN_metric(GAN_metric)
    FID_IS_tf.calculate_fid_stat_of_dataloader(data_loader=data_loader)

    comm.synchronize()

    pass
Exemplo n.º 2
0
  def test_extract_ImageNet_1000x50(self):
    """
    Usage:
        proj_root=moco-exp
        python template_lib/modelarts/scripts/copy_tool.py \
          -s s3://bucket-7001/ZhouPeng/codes/$proj_root -d /cache/$proj_root -t copytree
        cd /cache/$proj_root

        export CUDA_VISIBLE_DEVICES=0
        export TIME_STR=0
        export PYTHONPATH=./
        python -c "from template_lib.proj.imagenet.tests.test_imagenet import Testing_PrepareImageNet;\
          Testing_PrepareImageNet().test_extract_ImageNet_1000x50()"

    :return:
    """
    if 'CUDA_VISIBLE_DEVICES' not in os.environ:
      os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    if 'TIME_STR' not in os.environ:
      os.environ['TIME_STR'] = '0' if utils.is_debugging() else '0'
    from template_lib.v2.config_cfgnode.argparser import \
      (get_command_and_outdir, setup_outdir_and_yaml, get_append_cmd_str, start_cmd_run)
    from template_lib.v2.config_cfgnode import update_parser_defaults_from_yaml, global_cfg
    from template_lib.modelarts import modelarts_utils

    command, outdir = get_command_and_outdir(self, func_name=sys._getframe().f_code.co_name, file=__file__)
    argv_str = f"""
                --tl_config_file template_lib/proj/imagenet/tests/configs/PrepareImageNet.yaml
                --tl_command {command}
                --tl_outdir {outdir}
                """
    args, cfg = setup_outdir_and_yaml(argv_str, return_cfg=True)
    global_cfg.merge_from_dict(cfg)
    global_cfg.merge_from_dict(vars(args))

    modelarts_utils.setup_tl_outdir_obs(global_cfg)
    modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)
    modelarts_utils.prepare_dataset(global_cfg.get('modelarts_download', {}), global_cfg=global_cfg)

    train_dir = f'{cfg.data_dir}/train'
    counter_cls = 0
    for rootdir, subdir, files in os.walk(train_dir):
      if len(subdir) == 0:
        counter_cls += 1
        extracted_files = sorted(files)[:cfg.num_per_class]
        for file in tqdm.tqdm(extracted_files, desc=f'class: {counter_cls}'):
          img_path = os.path.join(rootdir, file)
          img_rel_path = os.path.relpath(img_path, cfg.data_dir)
          saved_img_path = f'{cfg.saved_dir}/{os.path.dirname(img_rel_path)}'
          os.makedirs(saved_img_path, exist_ok=True)
          shutil.copy(img_path, saved_img_path)
      pass

    modelarts_utils.prepare_dataset(global_cfg.get('modelarts_upload', {}), global_cfg=global_cfg, download=False)
    modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)
    pass
Exemplo n.º 3
0
def main():
    update_parser_defaults_from_yaml(parser)
    args = parser.parse_args()
    global_cfg.merge_from_dict(vars(args))
    modelarts_utils.setup_tl_outdir_obs(global_cfg)
    modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)
    modelarts_utils.prepare_dataset(global_cfg.get('modelarts_download', {}),
                                    global_cfg=global_cfg)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main_worker,
                 nprocs=ngpus_per_node,
                 args=(ngpus_per_node, args))
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)
Exemplo n.º 4
0
    ]
    for v in sys.argv[1:]:
        if '=' in v:
            name, value = v.split('=')
            if name == '--tl_opts':
                argv.append(name)
                argv.extend(value.split(' '))
            else:
                argv.extend([name, value])
        else:
            argv.append(v)
    sys.argv.clear()
    sys.argv.extend(argv)

    sys.argv[
        sys.argv.index('--tl_outdir') +
        1] = f"{sys.argv[sys.argv.index('--tl_outdir') + 1]}-{time_str}_{tmp_args.number:02d}"
    shutil.rmtree(sys.argv[sys.argv.index('--tl_outdir') + 1],
                  ignore_errors=True)

    print(f"sys.argv processed: ")
    pprint.pprint(sys.argv)
    parser = update_parser_defaults_from_yaml(parser=parser,
                                              use_cfg_as_args=True)
    logger = logging.getLogger('tl')

    args, _ = parser.parse_known_args()
    global_cfg.merge_from_dict(vars(args))
    print(get_dict_str(global_cfg))
    main()
Exemplo n.º 5
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.gpu == 0:
        update_parser_defaults_from_yaml(parser)
        global_cfg.merge_from_dict(vars(args))
        modelarts_utils.setup_tl_outdir_obs(global_cfg)
        modelarts_utils.modelarts_sync_results_dir(global_cfg, join=True)

    logger = logging.getLogger('tl')
    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = moco.builder.MoCo(models.__dict__[args.arch], args.moco_dim,
                              args.moco_k, args.moco_m, args.moco_t, args.mlp)
    logger.info(model)

    modelarts_utils.modelarts_sync_results_dir(global_cfg,
                                               join=True,
                                               is_main_process=(args.gpu == 0))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])],
                                   p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        augmentation = [
            transforms.RandomResizedCrop(224, scale=(0.2, 1.)),
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    train_dataset = datasets.ImageFolder(
        traindir,
        moco.loader.TwoCropsTransform(transforms.Compose(augmentation)))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                },
                is_best=False,
                filename=f'{args.tl_ckptdir}/checkpoint_{epoch:04d}.pth.tar')
            modelarts_utils.modelarts_sync_results_dir(
                global_cfg, join=False, is_main_process=(args.gpu == 0))