def getdata(path, isTrain): ds = LMDBSerializer.load(path, shuffle=isTrain) # Graph Benchmark # ds=FakeData([[10,10],[10,10],[10,10],[10,10],[10],[10],[10,10],[1],[1],[1]], 1000, random=False,dtype=['int32', 'int32', 'int32', 'int32', 'int32', 'int32',\ # 'int32', 'int32', 'int32', 'int32'], domain=[(0, 100), (0, 120),(0,120),(0,1),(0,100),(0,100),(0,100),(0,52),(0,115),(0,115)]) ds = getbatch(ds, 32, isTrain) if isTrain: ds = MultiProcessRunnerZMQ(ds, 4) return ds
def compute_mean_std(db, fname): ds = LMDBSerializer.load(db, shuffle=False) ds.reset_state() o = OnlineMoments() for dp in get_tqdm(ds): feat = dp[0] # len x dim for f in feat: o.feed(f) logger.info("Writing to {} ...".format(fname)) with open(fname, 'wb') as f: f.write(serialize.dumps([o.mean, o.std]))
def __init__(self, file_location, batch_size, train=True, shuffle=True, full=False, batch_from_disk=150): self.batch_size = batch_size self.train = train if train: self.ds = MyLMDBSerializer.load(file_location, shuffle=shuffle, batch_from_disk=batch_from_disk) self.ds = MyLocallyShuffleData(self.ds, buffer_size=10000, shuffle_interval=500) self.ds = MultiProcessRunnerZMQ(self.ds, num_proc=1, hwm=10000) self.len_ = 1281167 else: self.ds = LMDBSerializer.load(file_location, shuffle=False) self.ds = MultiProcessRunnerZMQ(self.ds, num_proc=1, hwm=10000) self.len_ = 50000 self.ds.reset_state() self.batches_in_epoch = int(math.ceil(self.len_ / self.batch_size))
def __init__(self, mode, batch_size=256, shuffle=False, num_workers=25, cache=50000, device='cuda'): # enumerate standard imagenet augmentors imagenet_augmentors = fbresnet_augmentor(mode == 'train') # load the lmdb if we can find it base_dir = '/userhome/cs/u3003679/' lmdb_loc = os.path.join(base_dir, 'ILSVRC-{}.lmdb'.format(mode)) #lmdb_loc = os.path.join(os.environ['IMAGENET'], 'ILSVRC-%s.lmdb'%mode) ds = LMDBSerializer.load(lmdb_loc, shuffle=shuffle) ds = LocallyShuffleData(ds, cache) # ds = td.LMDBDataPoint(ds) def f(dp): x, label = dp x = cv2.imdecode(x, cv2.IMREAD_COLOR) for aug in imagenet_augmentors: x = aug.augment(x) return x, label ds = MultiProcessMapDataZMQ(ds, num_proc=8, map_func=f) # ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) # ds = AugmentImageComponent(ds, imagenet_augmentors) # ds = td.PrefetchData(ds, 5000, 1) # ds = td.MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0) # ds = td.AugmentImageComponent(ds, imagenet_augmentors) # ds = td.PrefetchDataZMQ(ds, num_workers) self.ds = BatchData(ds, batch_size) # self.ds = MultiProcessRunnerZMQ(self.ds, 4) self.ds.reset_state() self.batch_size = batch_size self.num_workers = num_workers self.device = device
def getdata(path, batchsize, isTrain): ds = LMDBSerializer.load(path, shuffle=isTrain) ds = getbatch(ds, batchsize, isTrain) # if isTrain: # ds = MultiProcessRunnerZMQ(ds, 2) return ds
def getdata(path, isTrain): ds = LMDBSerializer.load(path, shuffle=isTrain) ds = getbatch(ds, 64, isTrain) if isTrain: ds = MultiProcessRunnerZMQ(ds, 4) return ds