def make_data_loader(cfg, is_train=True, max_iter=None, start_iter=0): train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset( cfg.DATASET_DIR, dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) shuffle = is_train data_loaders = [] for dataset in datasets: if shuffle: sampler = torch.utils.data.RandomSampler(dataset) else: sampler = torch.utils.data.sampler.SequentialSampler(dataset) batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=is_train) if max_iter is not None: batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter) data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train)) data_loaders.append(data_loader) if is_train: # during training, a single (possibly concatenated) data_loader is returned assert len(data_loaders) == 1 return data_loaders[0] return data_loaders
def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0): train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST print('数据集....') print(dataset_list) # 1. 首先是建立数据集dataset datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) shuffle = is_train or distributed data_loaders = [] for dataset in datasets: if distributed: sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) elif shuffle: sampler = torch.utils.data.RandomSampler(dataset) else: sampler = torch.utils.data.sampler.SequentialSampler(dataset) batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE # 这里的batchsize是32,给力! batch_sampler = torch.utils.data.sampler.BatchSampler( sampler=sampler, batch_size=batch_size, drop_last=False) if max_iter is not None: batch_sampler = samplers.IterationBasedBatchSampler( batch_sampler, num_iterations=max_iter, start_iter=start_iter) # 2. 然后建立的是数据加载器,这里指明了使用的CPU的线程和batch_sampler data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler, pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train)) data_loaders.append(data_loader) if is_train: # during training, a single (possibly concatenated) data_loader is returned assert len(data_loaders) == 1 return data_loaders[0] return data_loaders
def active_train(cfg, args): logger = logging.getLogger("SSD.trainer") raw_model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) raw_model.to(device) lr = cfg.SOLVER.LR * args.num_gpus optimizer = make_optimizer(cfg, raw_model, lr) milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS] scheduler = make_lr_scheduler(cfg, optimizer, milestones) arguments = {"iteration": 0} checkpointer = None save_to_disk = dist_util.get_rank() == 0 checkpointer = CheckPointer(raw_model, optimizer, scheduler, args.model_dir, save_to_disk, logger) max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus is_train = True train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) logger.info(f'Creating query loader...') query_loader = QueryLoader(datasets[0], args, cfg) logger.info(f'Creating al model...') strategy = get_strategy(args.strategy) model = ALModel(raw_model, strategy, optimizer, device, scheduler, arguments, args, checkpointer, cfg) logger.info(f'Training on initial data with size {args.init_size}...') n_bbox = query_loader.len_annotations() t1 = time.time() model.fit(query_loader.get_labeled_loader()) init_time = time.time() - t1 logger.info(f'Scoring after initial training...') score = model.score() logger.info(f'SCORE : {score:.4f}') fields = [ args.strategy, {}, 0, score, init_time, 0, init_time, len(query_loader), n_bbox ] save_to_csv(args.filename, fields) for step in range(args.query_step): logger.info(f'STEP NUMBER {step}') logger.info('Querying assets to label') t1 = time.time() query_idx = model.query( unlabeled_loader=query_loader.get_unlabeled_loader(), cfg=cfg, args=args, step=step, n_instances=args.query_size, length_ds=len(datasets[0])) logger.info('Adding labeled samples to train dataset') query_loader.add_to_labeled(query_idx, step + 1) t2 = time.time() logger.info('Fitting with new data...') model.fit(query_loader.get_labeled_loader()) total_time = time.time() - t1 train_time = time.time() - t2 active_time = total_time - train_time logger.info('Scoring model...') score = model.score() n_bbox = query_loader.len_annotations() fields = [ args.strategy, {}, step + 1, score, train_time, active_time, total_time, len(query_loader), n_bbox ] save_to_csv(args.filename, fields) logger.info(f'SCORE : {score:.4f}') return model.model
def _read_image(self, image_id): image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id) image = Image.open(image_file).convert("RGB") image = np.array(image) return image if __name__ == '__main__': from ssd.config import cfg from ssd.data.transforms import build_transforms, build_target_transform from ssd.data.datasets import build_dataset is_train = True train_transform = build_transforms(cfg, is_train=is_train) target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) image, targets, index = datasets[0].__getitem__(200) boxes = targets['boxes'] labels = targets['labels'] print(image.shape) print(boxes.shape) print(labels.shape) print(index)