Пример #1
0
def make_data_loader(cfg, is_train=True, max_iter=None, start_iter=0):
    train_transform = build_transforms(cfg, is_train=is_train)
    target_transform = build_target_transform(cfg) if is_train else None
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
    datasets = build_dataset(
        cfg.DATASET_DIR,
        dataset_list, transform=train_transform,
        target_transform=target_transform, is_train=is_train)

    shuffle = is_train
    data_loaders = []

    for dataset in datasets:
        if shuffle:
            sampler = torch.utils.data.RandomSampler(dataset)
        else:
            sampler = torch.utils.data.sampler.SequentialSampler(dataset)

        batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE
        batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=is_train)
        if max_iter is not None:
            batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter)

        data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler,
                                 pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train))
        data_loaders.append(data_loader)

    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Пример #2
0
def make_data_loader(cfg,
                     is_train=True,
                     distributed=False,
                     max_iter=None,
                     start_iter=0):
    train_transform = build_transforms(cfg, is_train=is_train)
    target_transform = build_target_transform(cfg) if is_train else None
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
    print('数据集....')
    print(dataset_list)
    # 1. 首先是建立数据集dataset

    datasets = build_dataset(dataset_list,
                             transform=train_transform,
                             target_transform=target_transform,
                             is_train=is_train)

    shuffle = is_train or distributed

    data_loaders = []

    for dataset in datasets:
        if distributed:
            sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
        elif shuffle:
            sampler = torch.utils.data.RandomSampler(dataset)
        else:
            sampler = torch.utils.data.sampler.SequentialSampler(dataset)

        batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE
        # 这里的batchsize是32,给力!
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler=sampler, batch_size=batch_size, drop_last=False)
        if max_iter is not None:
            batch_sampler = samplers.IterationBasedBatchSampler(
                batch_sampler, num_iterations=max_iter, start_iter=start_iter)

        # 2. 然后建立的是数据加载器,这里指明了使用的CPU的线程和batch_sampler
        data_loader = DataLoader(dataset,
                                 num_workers=cfg.DATA_LOADER.NUM_WORKERS,
                                 batch_sampler=batch_sampler,
                                 pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
                                 collate_fn=BatchCollator(is_train))
        data_loaders.append(data_loader)

    if is_train:
        # during training, a single (possibly concatenated) data_loader is returned
        assert len(data_loaders) == 1
        return data_loaders[0]
    return data_loaders
Пример #3
0
def active_train(cfg, args):
    logger = logging.getLogger("SSD.trainer")
    raw_model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    raw_model.to(device)

    lr = cfg.SOLVER.LR * args.num_gpus
    optimizer = make_optimizer(cfg, raw_model, lr)

    milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS]
    scheduler = make_lr_scheduler(cfg, optimizer, milestones)

    arguments = {"iteration": 0}

    checkpointer = None
    save_to_disk = dist_util.get_rank() == 0
    checkpointer = CheckPointer(raw_model, optimizer, scheduler,
                                args.model_dir, save_to_disk, logger)

    max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus

    is_train = True
    train_transform = build_transforms(cfg, is_train=is_train)
    target_transform = build_target_transform(cfg) if is_train else None
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
    datasets = build_dataset(dataset_list,
                             transform=train_transform,
                             target_transform=target_transform,
                             is_train=is_train)

    logger.info(f'Creating query loader...')
    query_loader = QueryLoader(datasets[0], args, cfg)

    logger.info(f'Creating al model...')
    strategy = get_strategy(args.strategy)
    model = ALModel(raw_model, strategy, optimizer, device, scheduler,
                    arguments, args, checkpointer, cfg)

    logger.info(f'Training on initial data with size {args.init_size}...')
    n_bbox = query_loader.len_annotations()
    t1 = time.time()
    model.fit(query_loader.get_labeled_loader())
    init_time = time.time() - t1
    logger.info(f'Scoring after initial training...')
    score = model.score()
    logger.info(f'SCORE : {score:.4f}')

    fields = [
        args.strategy, {}, 0, score, init_time, 0, init_time,
        len(query_loader), n_bbox
    ]
    save_to_csv(args.filename, fields)

    for step in range(args.query_step):
        logger.info(f'STEP NUMBER {step}')
        logger.info('Querying assets to label')
        t1 = time.time()
        query_idx = model.query(
            unlabeled_loader=query_loader.get_unlabeled_loader(),
            cfg=cfg,
            args=args,
            step=step,
            n_instances=args.query_size,
            length_ds=len(datasets[0]))
        logger.info('Adding labeled samples to train dataset')
        query_loader.add_to_labeled(query_idx, step + 1)
        t2 = time.time()
        logger.info('Fitting with new data...')
        model.fit(query_loader.get_labeled_loader())
        total_time = time.time() - t1
        train_time = time.time() - t2
        active_time = total_time - train_time
        logger.info('Scoring model...')
        score = model.score()
        n_bbox = query_loader.len_annotations()
        fields = [
            args.strategy, {}, step + 1, score, train_time, active_time,
            total_time,
            len(query_loader), n_bbox
        ]
        save_to_csv(args.filename, fields)
        logger.info(f'SCORE : {score:.4f}')

    return model.model
Пример #4
0
    def _read_image(self, image_id):
        image_file = os.path.join(self.data_dir, "JPEGImages",
                                  "%s.jpg" % image_id)
        image = Image.open(image_file).convert("RGB")
        image = np.array(image)
        return image


if __name__ == '__main__':
    from ssd.config import cfg
    from ssd.data.transforms import build_transforms, build_target_transform
    from ssd.data.datasets import build_dataset

    is_train = True

    train_transform = build_transforms(cfg, is_train=is_train)
    target_transform = build_target_transform(cfg) if is_train else None
    dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
    datasets = build_dataset(dataset_list,
                             transform=train_transform,
                             target_transform=target_transform,
                             is_train=is_train)

    image, targets, index = datasets[0].__getitem__(200)
    boxes = targets['boxes']
    labels = targets['labels']
    print(image.shape)
    print(boxes.shape)
    print(labels.shape)
    print(index)