def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) #train_sampler = gcv.nn.sampler.SplitSampler(train_dataset.get_im_aspect_ratio(), # batch_size, # num_parts=hvd.size() if args.horovod else 1, # part_index=hvd.rank() if args.horovod else 0, # shuffle=True) train_sampler = gcv.nn.sampler.SplitSampler( batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0) train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader
def get_faster_rcnn_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get faster rcnn dataloader.""" if (not args.final_fit) and (not val_dataset): train_dataset, val_dataset = _train_val_split(train_dataset, args.split_ratio) train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=1, part_index=0, shuffle=True) train_loader = gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=True)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = None if val_dataset: val_loader = gluon.data.DataLoader( val_dataset.transform(val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) args.num_samples = len(train_dataset) return train_loader, val_loader
def _get_dataloader(net, test_dataset, data_shape, batch_size, num_workers, num_devices, args): """Get dataloader.""" if args.meta_arch == 'yolo3': width, height = data_shape, data_shape val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) test_loader = gluon.data.DataLoader( test_dataset.transform( YOLO3DefaultValTransform(width, height)), batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers) return test_loader elif args.meta_arch == 'faster_rcnn': """Get faster rcnn dataloader.""" test_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device test_loader = gluon.data.DataLoader( test_dataset.transform( FasterRCNNDefaultValTransform(short, net.max_size)), num_devices, False, batchify_fn=test_bfn, last_batch='keep', num_workers=args.num_workers) return test_loader else: raise NotImplementedError('%s not implemented.' % args.meta_arch)
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return None, val_loader
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True) # dataset: train_dataset.transform(train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)) # ashape: anchor 预先定义的大小 # multi_stage + ashape : 计算anchor train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device # dataset: val_dataset.transform(val_transform(short, net.max_size)) # 每个item返回 img, bbox.astype('float32'), mx.nd.array([im_scale]) # bbox: x1, y1, x2, y2, class_id # img最短边<= short,最长边<=net.max_size # Tuple 不是python中的元组tuple # Append(): 每个样本自成ndarray,所有样本数据的大小不必相同,返回的batch是列表 # val_bfn 有3个Append(),每个Append()处理dataset item的一个属性 val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, 'get_im_aspect_ratio'): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.] * len(train_dataset) if args.horovod: num_parts = hvd.size() part_index = hvd.rank() elif "perseus" in args.kv_store: num_parts = kv.num_workers part_index = kv.rank else: num_parts = 1 part_index = 0 train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(im_aspect_ratio, batch_size, num_parts=num_parts, part_index=part_index, shuffle=True) train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader(val_dataset.transform( val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader
# (mxnet uses BCHW format) before they are fed into neural networks. # # A handy DataLoader would be very convenient for us to apply different transforms and aggregate data into mini-batches. # # Because Faster-RCNN handles raw images with various aspect ratios and various shapes, we provide a # :py:class:`gluoncv.data.batchify.Append`, which neither stack or pad images, but instead return lists. # In such way, image tensors and labels returned have their own shapes, unaware of the rest in the same batch. from gluoncv.data.batchify import Tuple, Append from mxnet.gluon.data import DataLoader batch_size = 2 # for tutorial, we use smaller batch-size num_workers = 0 # you can make it larger(if your CPU has more cores) to accelerate data loading # behavior of batchify_fn: stack images, and pad labels batchify_fn = Tuple(Append(), Append()) train_loader = DataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers) val_loader = DataLoader(val_dataset.transform(val_transform), batch_size, shuffle=False, batchify_fn=batchify_fn, last_batch='keep', num_workers=num_workers) for ib, batch in enumerate(train_loader): if ib > 3:
self._imglist = [file for file in glob.iglob(imgPath)] self._csvPath = [file for file in glob.iglob(csvPath)] self._len = len(self._imglist) def __getitem__(self, idx): img = image.imread(self._imglist[idx]) label = pd.read_csv(self._csvPath[idx]) return (img, label) def __len__(self): return self._len if __name__ == "__main__": batch_size = 2 batchify_fn = Tuple(Append(), Append()) train_dataset = DensemapDataset() im = train_dataset[0] def train_transform(*trans_data): img = trans_data[0] aug = gdata.vision.transforms.RandomFlipLeftRight() return (aug(img), trans_data[1]) train_loader = DataLoader(train_dataset.transform(train_transform), batch_size=2, shuffle=True, batchify_fn=batchify_fn) for ib, batch in enumerate(train_loader): # batch[0] 是 X , batch[1] 是 y # batch[0][0] 是第0个X
class_names=train_dataset.classes, ax=ax) plt.show() ########################################################## # Data Loader # ----------- # Data loader is identical to Faster R-CNN with the difference of mask input and output. from gluoncv.data.batchify import Tuple, Append, MaskRCNNTrainBatchify from mxnet.gluon.data import DataLoader batch_size = 2 # for tutorial, we use smaller batch-size num_workers = 0 # you can make it larger(if your CPU has more cores) to accelerate data loading train_bfn = Tuple(*[Append() for _ in range(3)]) train_loader = DataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True, batchify_fn=train_bfn, last_batch='rollover', num_workers=num_workers) val_bfn = Tuple(*[Append() for _ in range(2)]) val_loader = DataLoader(val_dataset.transform(val_transform), batch_size, shuffle=False, batchify_fn=val_bfn, last_batch='keep', num_workers=num_workers) for ib, batch in enumerate(train_loader):
def get_dataloader(net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) train_sampler = \ gcv.nn.sampler.SplitSortedBucketSampler(train_dataset.get_im_aspect_ratio(), batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True) train_loader = mx.gluon.data.DataLoader(train_dataset.transform( train_transform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader( val_dataset.transform(val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=args.num_workers) return train_loader, val_loader def save_params(net, logger, best_map, current_map, epoch, save_interval, prefix): current_map = float(current_map) if current_map > best_map[0]: logger.info('[Epoch {}] mAP {} higher than current best {} saving to {}'.format( epoch, current_map, best_map, '{:s}_best.params'.format(prefix))) best_map[0] = current_map net.save_parameters('{:s}_best.params'.format(prefix))
class_names=train_dataset.classes, ax=ax) plt.show() ########################################################## # Data Loader # ----------- # Data loader is identical to Faster R-CNN with the difference of mask input and output. from gluoncv.data.batchify import Tuple, Append from mxnet.gluon.data import DataLoader batch_size = 2 # for tutorial, we use smaller batch-size num_workers = 0 # you can make it larger(if your CPU has more cores) to accelerate data loading train_bfn = Tuple(*[Append() for _ in range(3)]) train_loader = DataLoader(train_dataset.transform(train_transform), batch_size, shuffle=True, batchify_fn=train_bfn, last_batch='rollover', num_workers=num_workers) val_bfn = Tuple(*[Append() for _ in range(2)]) val_loader = DataLoader(val_dataset.transform(val_transform), batch_size, shuffle=False, batchify_fn=val_bfn, last_batch='keep', num_workers=num_workers) for ib, batch in enumerate(train_loader):
def get_voc_iterator(rank, num_workers, net, num_shards): data_dir = "data-%d" % rank try: s3_client = boto3.client('s3') for file in [ 'VOCtrainval_06-Nov-2007.tar', 'VOCtest_06-Nov-2007.tar', 'VOCtrainval_11-May-2012.tar' ]: s3_client.download_file(args.s3bucket, f'voc_tars/{file}', f'/opt/ml/code/{file}') with tarfile.open(filename) as tar: tar.extractall(path=path) except: print('downloading from source') download_voc(data_dir) input_shape = (1, 256, 256, 3) batch_size = args.batch_size # might want to replace with mx.io.ImageDetRecordIter, this means you need data in RecordIO format # train_iter = mx.io.MNISTIter( # image="%s/train-images-idx3-ubyte" % data_dir, # label="%s/train-labels-idx1-ubyte" % data_dir, # input_shape=input_shape, # batch_size=batch_size, # shuffle=True, # flat=False, # num_parts=hvd.size(), # part_index=hvd.rank() # ) train_dataset = gdata.VOCDetection( root=f'/opt/ml/code/data-{rank}/VOCdevkit/', splits=[(2007, 'trainval'), (2012, 'trainval')]) val_dataset = gdata.VOCDetection( root=f'/opt/ml/code/data-{rank}/VOCdevkit/', splits=[(2007, 'test')]) val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) im_aspect_ratio = [1.] * len(train_dataset) train_bfn = FasterRCNNTrainBatchify(net) train_sampler = gluoncv.nn.sampler.SplitSortedBucketSampler( im_aspect_ratio, batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True) # had issue with multi_stage=True train_iter = mx.gluon.data.DataLoader(train_dataset.transform( FasterRCNNDefaultTrainTransform(net.short, net.max_size, net, ashape=net.ashape, multi_stage=False)), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=num_workers) val_bfn = Tuple(*[Append() for _ in range(3)]) short = net.short[-1] if isinstance(net.short, (tuple, list)) else net.short # validation use 1 sample per device val_iter = mx.gluon.data.DataLoader(val_dataset.transform( FasterRCNNDefaultValTransform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch='keep', num_workers=num_workers) return train_iter, val_iter