def copy_samples_lmdb(path_lmdb, path_dst, keys, func_data=None): """ Copy select samples from an lmdb into another. Can be used for sampling from an lmdb into another and generating a random shuffle of lmdb content. Parameters: path_lmdb -- source lmdb path_dst -- destination lmdb keys -- list of keys or indices to sample from source lmdb """ db = lmdb.open(path_dst, map_size=MAP_SZ) key_dst = 0 with db.begin(write=True) as txn_dst: with lmdb.open(path_lmdb, readonly=True).begin() as txn_src: for key_src in keys: if not isinstance(key_src, basestring): key_src = IDX_FMT.format(key_src) if func_data is None: txn_dst.put(IDX_FMT.format(key_dst), txn_src.get(key_src)) else: txn_dst.put(IDX_FMT.format(key_dst), func_data(txn_src.get(key_src))) key_dst += 1 db.close()
def _infer_to_lmdb_cur_multi_key(net, keys, n, dbs): ''' Run network inference for n batches and save results to an lmdb for each key. Higher time complexity but lower space complexity. See _infer_to_lmdb_cur_single_key() if there is only a single key ''' idxs = [0] * len(keys) for _ in range(n): d = forward(net, keys) for ik, k in enumerate(keys): with dbs[k].begin(write=True) as txn: l = [] l.extend(d[k].astype(float)) for x in l: x = expand_dims(x, 3) txn.put(IDX_FMT.format(idxs[ik]), caffe.io.array_to_datum(x).SerializeToString()) idxs[ik] += 1 return idxs
def matfiles_to_lmdb(paths_src, path_dst, fieldname, lut=None): ''' Generate LMDB file from set of mat files with integer data Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045 credit: Evan Shelhamer ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: for idx, path_ in enumerate(paths_src): content_field = io.loadmat(path_)[fieldname] # get shape (1,H,W) content_field = expand_dims(content_field, 3) content_field = content_field.astype(int) if lut is not None: content_field = lut(content_field) img_dat = caffe.io.array_to_datum(content_field) in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString()) db.close() return 0
def read_values_at(path_lmdb, key, dtype=None): """ Read key from lmdb adapted from Gustav Larsson http://deepdish.io/2015/04/28/creating-lmdb-in-python/ """ with lmdb.open(path_lmdb, readonly=True).begin() as txn: if not isinstance(key, basestring): key = IDX_FMT.format(key) dat, x = unpack_raw_datum(txn.get(key), dtype) return x, dat.label # scalar label
def arrays_to_lmdb(arrs, path_dst): ''' Generate LMDB file from list of ndarrays ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: for idx, x in enumerate(arrs): content_field = expand_dims(x, 3) dat = caffe.io.array_to_datum(content_field) in_txn.put(IDX_FMT.format(idx), dat.SerializeToString()) db.close() return 0
def imgs_to_lmdb(paths_src, path_dst): ''' Generate LMDB file from set of images Source: https://github.com/BVLC/caffe/issues/1698#issuecomment-70211045 credit: Evan Shelhamer ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: for idx, path_ in enumerate(paths_src): img = read_img_cv2(path_) img_dat = caffe.io.array_to_datum(img) in_txn.put(IDX_FMT.format(idx), img_dat.SerializeToString()) db.close() return 0
def _infer_to_lmdb_cur_single_key(net, key_, n, db): ''' Run network inference for n batches and save results to an lmdb for each key. Higher time complexity but lower space complexity. Takes advantage if there is only a single key ''' idx = 0 with db.begin(write=True) as txn: for _ in range(n): d = forward(net, [key_]) l = [] l.extend(d[key_].astype(float)) for x in l: x = expand_dims(x, 3) txn.put(IDX_FMT.format(idx), caffe.io.array_to_datum(x).SerializeToString()) idx += 1 return [idx]
def concatenate_lmdb(paths_lmdb, path_dst): """ Copy select samples from an lmdb into another. Can be used for sampling from an lmdb into another and generating a random shuffle of lmdb content. Parameters: paths_lmdb -- list of lmdbs to conatenate path_dst -- destination lmdb keys -- list of keys or indices to sample from source lmdb """ db = lmdb.open(path_dst, map_size=MAP_SZ) key_dst = 0 with db.begin(write=True) as txn_dst: for p in paths_lmdb: with lmdb.open(p, readonly=True).begin() as txn_src: for _, value in txn_src.cursor(): txn_dst.put(IDX_FMT.format(key_dst), value) key_dst += 1 db.close()
def scalars_to_lmdb(scalars, path_dst, lut=None): ''' Generate LMDB file from list of scalars ''' db = lmdb.open(path_dst, map_size=MAP_SZ) with db.begin(write=True) as in_txn: if not hasattr(scalars, '__iter__'): scalars = np.array([scalars]) for idx, x in enumerate(scalars): if not hasattr(x, '__iter__'): content_field = np.array([x]) else: content_field = np.array(x) # validate these are scalars if content_field.size != 1: raise AttributeError( "Unexpected shape for scalar at i=%d (%s)" % (idx, str(content_field.shape))) # guarantee shape (1,1,1) content_field = expand_dims(content_field, 3) content_field = content_field.astype(int) if lut is not None: content_field = lut(content_field) dat = caffe.io.array_to_datum(content_field) in_txn.put(IDX_FMT.format(idx), dat.SerializeToString()) db.close() return 0