def __init__(self, params, indexlist, phase, proc_id): super(BatchLoader, self).__init__() self.indexlist = indexlist self.proc_id = proc_id self.batch_size = params['batch_size'] self.im_shape = params['im_shape'] self.phase = phase self.queue = Queue(_QSIZE) #rec_conn, send_conn = Pipe() # self.rec_conn = rec_conn # self.send_conn = send_conn ## Dividing with rest the batch size for the jobs we have self.batch_ck_size = self.batch_size // _nJobs ## in case of the last jobs adding the rest if self.proc_id == (_nJobs - 1): self.batch_ck_size += self.batch_size % _nJobs ## Opening LMDB lmdb_output_pose_env = lmdb.Environment(params['source'] + '/pose_lmdb/', readonly=True, lock=False) self.cur_pose = lmdb_output_pose_env.begin().cursor() lmdb_output_flip_env = lmdb.Environment(params['source'] + '/flip_lmdb/', readonly=True, lock=False) self.cur_flip = lmdb_output_flip_env.begin().cursor() lmdb_output_land_env = lmdb.Environment(params['source'] + '/land_lmdb/', readonly=True, lock=False) self.cur_land = lmdb_output_land_env.begin().cursor() ################ self.Nimgs = len(self.indexlist) # this class does some simple data-manipulations #proto_data = open(params['mean_file'], "rb").read() #a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data) #mean = caffe.io.blobproto_to_array(a)[0] ## mean is read BGR and c,h,w; we convert it to h,w,c. ## BGR is OK since OpenCV and caffe are BGR ## Then MySimpleTransformer will remove mean after that the image ## has been changed to BGR as well. So apple-to-apple. self.transformer = MySimpleTransformer() self.aug_tr = aug_tracker.AugmentationTracker() if params['mean_file'] is not None: mean = np.load(params['mean_file']) mean = mean.transpose(1, 2, 0) mean = np.float32(mean) self.transformer.set_mean(mean) if self.phase == 1: util.myprint("BatchLoader_valid" + str(self.proc_id) + " initialized with " + str(self.Nimgs) + " images") else: util.myprint("BatchLoader_train" + str(self.proc_id) + " initialized with " + str(self.Nimgs) + " images") util.myprint("This will process: " + str(self.batch_ck_size) + '/' + str(self.batch_size))
def make_database(db_name, files, map): im_db_name = db_name + '_im_db' label_db_name = db_name + '_label_db' if os.path.isdir(im_db_name): raise Exception(im_db_name + ' already exists. Delete it') if os.path.isdir(label_db_name): raise Exception(label_db_name + ' already exists. Delete it') # open the database for writing im_db = lmdb.Environment(im_db_name, map_size=1000000000000) label_db = lmdb.Environment(label_db_name, map_size=1000000000000) # output image file size sz = (256, 256) with im_db.begin(write=True) as im_db_txn: with label_db.begin(write=True) as label_db_txn: for idx, file in enumerate(files): # get the label label = np.asarray(map[file], dtype=np.float) # make the label N x 1 x 1 label = label.reshape(label.shape + (1,1)) # load the image (RGB) im = caffe.io.load_image(BASE_DIR + 'imageAlignedLD/' + file[6:]) im = caffe.io.resize_image(im, sz) # channel swap for pre-trained (RGB -> BGR) im = im[:, :, [2,1,0]] # make channels x height x width im = im.swapaxes(0,2).swapaxes(1,2) # convert to uint8 im = (255*im).astype(np.uint8, copy=False) # image to datum im_datum = caffe.io.array_to_datum(im) im_datum.ClearField('label') im_str = im_datum.SerializeToString() # label to datum label_datum = caffe.io.array_to_datum(label) label_datum.ClearField('label') label_str = label_datum.SerializeToString() # insert into the database im_db_txn.put(file, im_str) label_db_txn.put(file, label_str) if idx % 500 == 0: print "processed %d of %d (%s)" % (idx, len(files), db_name)
def main(): readEnv = lmdb.Environment(input_fpath, readonly=True) writeEnv = lmdb.Environment(output_fpath, readonly=False, map_size=1000000000000) # 1 TB data = readPairs(fpath) with readEnv.begin() as readTx: for pair in data: f = readTx.get(pair[0]) with writeEnv.begin(write=True) as writeTxn: writeTxn.put(pair[1], f) tic_toc_print('Done for %s' % pair)
def read_test(): img_db_fn = 'data/image_train.lmdb' img_env = lmdb.Environment(img_db_fn, map_size=1099511627776) img_txn = img_env.begin(write=True, buffers=True) img_cur = img_txn.cursor() jnt_db_fn = 'data/joint_train.lmdb' jnt_env = lmdb.Environment(jnt_db_fn, map_size=1099511627776) jnt_txn = jnt_env.begin(write=True, buffers=True) jnt_cur = jnt_txn.cursor() for _ in range(10000): img_cur.next() jnt_cur.next() img_datum = caffe.io.caffe_pb2.Datum() jnt_datum = caffe.io.caffe_pb2.Datum() if not os.path.exists('data/test'): os.makedirs('data/test') for i in range(100): img_key, img_value = img_cur.item() jnt_key, jnt_value = jnt_cur.item() if img_key != jnt_key: sys.exit('img_key and jnt_key should be same') img_datum.ParseFromString(img_value) jnt_datum.ParseFromString(jnt_value) img_data = [struct.unpack('B', d) for d in img_datum.data] img_data = np.asarray(img_data, dtype=np.uint8) img_data = img_data.reshape( (img_datum.channels, img_datum.height, img_datum.width)) img = np.array(img_data.swapaxes(0, 2).swapaxes(0, 1)) cv.imwrite('data/test/%d.jpg' % i, img) img = cv.imread('data/test/%d.jpg' % i) jnt_data = np.asarray(jnt_datum.float_data).reshape((7, 2)) for j in jnt_data: jt = (int(j[0] * img.shape[1]), int(j[1] * img.shape[0])) print jt cv.circle(img, jt, 5, (0, 0, 255), -1) cv.imwrite('data/test/%d.jpg' % i, img) img_cur.next() jnt_cur.next() print i img_env.close() jnt_env.close()
def storeSTRs(subfolder, key, val): with lmdb.Environment.begin(lmdb.Environment( '/home/alex/workspace-noneclipse/crycsv/' + subfolder + '/', max_dbs=0), write=True, buffers=True) as txn: txn.put(str(key).encode(), str(val).encode())
def test_lmdb(name='train_QP36_QP40'): # env_db = lmdb.Environment(name) dir = '/data/disk2/SHVC_SNR/data/rec_data/lmdb/train_QP36_QP39_part8' # /train_QP36_QP40 dir env_db = lmdb.Environment(dir) # env_db = lmdb.open("./trainC") txn = env_db.begin() # get函数通过键值查询数据,如果要查询的键值没有对应数据,则输出None buf = txn.get(str('GT_001_088').encode('ascii')) print(type(buf)) # value = np.frombuffer(buf, dtype=np.uint8) # value = value.reshape(720, 1280) # cv2.imwrite('./test.png', value) # k = 1 # for key, value in txn.cursor(): #遍历 # print (key) # value = np.frombuffer(value, dtype=np.uint8) # value = value.reshape(720, 1280) # # print(value.shape) # # cv2.imshow(str(k), value) # # cv2.waitKey() # # cv2.imwrite('./test_'+ str(k) + '.png', value) # k+=1 # if k >= 10: # break env_db.close()
def load_data_into_lmdb(path, features, labels=None): env = lmdb.Environment(path, map_size=features.nbytes * 2, subdir=True) #env = lmdb.open(lmdb_name, ) features = features[:, :, None, None] with env.begin(write=True) as txn: for i in range(features.shape[0]): datum = caffe.proto.caffe_pb2.Datum() datum.channels = features.shape[1] datum.height = 1 datum.width = 1 if features.dtype == np.int: datum.data = features[i].tostring() elif features.dtype == np.float: datum.float_data.extend(features[i].flat) else: raise Exception("features.dtype unknown.") if labels is not None: datum.label = int(labels[i]) str_id = '{:08}'.format(i) txn.put(str_id, datum.SerializeToString())
def open_env(version, name): path = os.path.join(version, name) if os.path.exists(os.path.join(path, 'data.mdb')): raise LMDBExistsError if not os.path.exists(path): os.makedirs(path) return lmdb.Environment(path=path, max_dbs=5, create=True)
def getSTRs(subfolder, key): with lmdb.Environment.begin(lmdb.Environment( '/home/alex/workspace-noneclipse/crycsv/' + subfolder + '/', max_dbs=0), write=False, buffers=True) as txn: return bytes(txn.get(key.encode())).decode('UTF-8')
def write_lmdb(img_to_class_path, lmdb_path): img_to_class_fo = open(img_to_class_path, "r") lmdb_map_size = 1 << 40 env = lmdb.Environment(lmdb_path, map_size=lmdb_map_size) with env.begin(write=True) as txn: # txn is Transaction object count = 0 for line in img_to_class_fo.readlines(): line = line.rstrip() img_path = line.split()[0] img_class = int(line.split()[1]) img_data = imageio.imread(img_path).astype( np.float32) # shape is (n, n) img_data = img_data.reshape(img_data.shape[0], img_data.shape[1], 1) # shape is (n, n, 1) # convert from height-width-channel(HWC) to channel-height-width (CHW) img_data = np.transpose(img_data, (2, 0, 1)) tensor_protos = caffe2_pb2.TensorProtos() img_tensor = tensor_protos.protos.add() img_tensor.dims.extend(img_data.shape) img_tensor.data_type = 1 flatten_img = img_data.reshape(np.prod(img_data.shape)) img_tensor.float_data.extend(flatten_img) img_class_tensor = tensor_protos.protos.add() img_class_tensor.data_type = 2 img_class_tensor.int32_data.append(img_class) txn.put('{}'.format(count).encode('ascii'), tensor_protos.SerializeToString()) if count % 10 == 0: print("Inserted {} rows".format(count)) count += 1 print("Inserted {} rows".format(count)) print("\nLMDB saved at " + lmdb_path + "\n\n") img_to_class_fo.close()
def test_update_targets(): variant_ids = "/s/project/kipoi-cadd/data/raw/v1.3/training_data/sample_variant_ids.pkl" varids = load_variant_ids(variant_ids) lmdb_dir = "/s/project/kipoi-cadd/data/tests/lmdb_3/" num_vars = 0 inputfile = \ get_data_dir() + "/raw/v1.3/training_data/training_data.imputed.csv" row_example = pd.read_csv(inputfile, sep=',', nrows=1, skiprows=1, header=None) map_size = cadd_serialize_numpy_row(row_example.values[0], varids[0], np.float16, 0).to_buffer().size map_size = map_size * (varids.shape[0] + 1) * 5 env = lmdb.Environment(lmdb_dir, lock=False, map_size=map_size, writemap=True) with env.begin(write=True, buffers=True) as txn: for var in varids: row = bytes(txn.get(var.encode('ascii'))) np_row = pa.deserialize(row) if np_row['targets'] == -1: np_row['targets'] = 0 ser_data = pa.serialize(np_row) buf = ser_data.to_buffer() txn.replace(var.encode('ascii'), buf) num_vars += 1 print("Finished changing", num_vars, "rows.")
def write_images_to_db(self, db_name, images, callback=lambda a: 0): ''' Writes images to a lmdb database inside this model's folder. db_name is the name of the database. If a database by that name already exists, it will be overwritten. images is a list of 2-element tuples. The first elemnt of each tuple should be a path to an image, and the second element should be the label of that image. ''' random.shuffle(images) #Shuffle input data to improve training. p = os.path.join(self.get_folder(), db_name) s.call(['rm', '-r', p]) map_size = 256 * 256 * 3 * 2 * len(images) env = lmdb.Environment(p, map_size=map_size) write_to = env.begin(write=True, buffers=True) i = 0 num_images = len(images) update_interval = int(num_images / 100 + 1) for image in images: try: resize_image(image[0]) input = np.transpose( mp.imread('/tmp/resized.jpg'), (2, 1, 0)) #Caffe wants CxHxW, not the standard WxHxC. datum = array_to_datum(input, image[1]) write_to.put('{:08}'.format(i).encode('ascii'), datum.SerializeToString()) i += 1 except: pass if (i % update_interval == 0): callback([(i / num_images, '')]) write_to.commit() env.close()
def __init__(self, filename, flag): """Constructor for the LMDBNoLockDatabase class. Args: filename (str): The filename of the database file. flag (str): a flag indicating the mode for opening the database. Refer to the documentation for anydbm.open(). """ super().__init__() create = bool(flag == 'c') if flag == 'n': if os.path.isfile(filename): os.remove(filename) create = True self._lmdb = lmdb.Environment(path=filename, map_size=1024**4, map_async=True, writemap=True, readahead=False, subdir=False, create=create, lock=True)
def write_images_to_lmdb(img_dir, db_name, labels): for root, dirs, files in os.walk(img_dir, topdown = False): if root != img_dir: continue #multiply 2 to make the map_size large enough map_size = 2*IMAGE_WIDTH * IMAGE_HEIGHT *3*len(files) env = lmdb.Environment(db_name, map_size=map_size) txn = env.begin(write=True,buffers=True) for idx, name in enumerate(files): #print(img_dir + name) img = cv2.imread(img_dir + name, cv2.IMREAD_COLOR) img = transform_img(img, img_width=IMAGE_WIDTH, img_height=IMAGE_HEIGHT) #print(img.shape) #print(img) img=img.transpose(2,0,1) y = labels[name] #print(name) #print(y) datum = array_to_datum(img,y) #print(datum) #lala str_id = '{:08}'.format(idx) txn.put(str_id.encode('ascii'), datum.SerializeToString()) if idx % 1000 == 1: print("transforming" + str(idx) + "th image to sb") txn.commit() env.close() print " ".join(["Writing to", db_name, "done!"])
def __init__(self, storage_dir=None, shared_key=get_shared_key_from_environ(), client_password="******"): self._shared_key = shared_key self._client_password = client_password if not storage_dir: storage_dir = tempfile.mkdtemp() self.file_dir = os.path.join(storage_dir, 'storage') try: os.makedirs(self.file_dir) except: pass # This is OK self.db_env = lmdb.Environment( os.path.join(storage_dir, 'db'), map_size=1024**3, # 1GB - mostly metadata, so should be fine max_dbs=5) self.nonce_db = self.db_env.open_db(b'nonce') self.inbox_db = self.db_env.open_db(b'inbox', dupsort=True) self.metadata_db = self.db_env.open_db(b'metadata') self.increment_db = self.db_env.open_db(b'increment') self.tracking_db = self.db_env.open_db(b'tracking') self.timestamp_source = MonotonicTimestampSource()
def get_one_batch(lmdb_batch_dir, idx): env = lmdb.Environment(lmdb_batch_dir, readonly=True, lock=False) with env.begin() as txn: buff = bytes(txn.get(str(idx).encode('ascii'))) ser = blosc.decompress(buff) batch = pa.deserialize(ser) return batch
def createDB(name, contenido, funcion): db = lmdb.Environment(name, map_size=int(1e12)) tx = db.begin(write=True) for label, imagen in enumerate(contenido): im = Image.open(imagen) punt = im.fp im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT)) if 'cat' in os.path.split(imagen)[1]: y = 0 else: y = 1 x = np.array(im.getdata()).reshape(im.size[1], im.size[0], 3) datum = array_to_datum(np.transpose(x, (2, 0, 1)), y) if funcion(label, SCORE): print label tx.put('{:08}'.format(label), datum.SerializeToString()) if not punt.closed: punt.close() if (label + 1) % 2500 == 0: tx.commit() tx = db.begin(write=True) print '------- commit -------' tx.commit() db.close()
def start(self, ctx=None): logger.debug("Starting data engine...") # register with the context if ctx: ctx.bind('dataengine', self) if not self.datapath: self.datapath = os.path.join(environ.data_dir(), 'stores') if not os.path.exists(self.datapath): os.mkdir(self.datapath) logger.debug("Data path: %s", self.datapath) try: self.database = lmdb.Environment(self.datapath, map_size=2000000000, max_dbs=1024) with self.database.begin(write=False) as txn: cur = txn.cursor() for k, v in iter(cur): logger.debug("Found existing store: %s", k) _db = self.database.open_db(k, create=False) self.stores[k] = Store(k, _db, self) except lmdb.Error: logger.exception("Failed to open database.", exc_info=True) raise logger.debug("Data engine started.")
def create_lmdb(db_name, labels): map_size = SEQUENCE_LENGTH * ALPHA_LENGTH * 27 * len( labels) # 27x is just for safety env = lmdb.Environment(db_name, map_size=map_size) txn = env.begin(write=True, buffers=True) X_copy = np.ndarray((3, SEQUENCE_LENGTH, ALPHA_LENGTH)) for i, label in enumerate(labels): print('writing img-{}'.format(i)) X = mp.imread(IMG_BASE_NAME.format(i)) * 255 X_rev = np.transpose(X) X_copy[0, :, :] = X_rev X_copy[1, :, :] = X_rev X_copy[2, :, :] = X_rev if label < 5: class_label = 0 else: class_label = 1 datum = array_to_datum(X_copy.astype(np.uint8), class_label) str_id = '{:08}'.format(i) txn.put(str_id.encode('ascii'), datum.SerializeToString()) txn.commit() env.close() print('Done creating {}!'.format(db_name))
def main(json_path=None, lmdb_path=None): assert json_path is not None, 'json_path is needed' if lmdb_path is None: lmdb_path = json_path meta = os.path.join(json_path, 'meta.json') data_ids = [] value = {} env = lmdb.Environment(lmdb_path, subdir=True, map_size=int(1e9), max_dbs=2, lock=False) db_extra = env.open_db('extra'.encode(), create=True) db_image = env.open_db('image'.encode(), create=True) with open(meta, 'r') as meta_reader: for line in tqdm(meta_reader): single_meta = json.loads(line) data_id = os.path.join(json_path, single_meta['filename']) data_id = str(data_id.encode('utf-8').decode('utf-8')) with open(data_id.encode(), 'rb') as file_reader: image = file_reader.read() value['extra'] = {} for key in single_meta['extra']: value['extra'][key] = single_meta['extra'][key] with env.begin(write=True) as lmdb_writer: lmdb_writer.put(data_id.encode(), pickle.dumps(value), db=db_extra) with env.begin(write=True) as image_writer: image_writer.put(data_id.encode(), image, db=db_image) env.close()
def __init__(self, data_dir, split, alphabet, line_height, transforms): logger.info("Loading IAM [%s] dataset..." % split) self.data_dir = data_dir self.split = split self.alphabet = alphabet self.preprocess = transforms # Read Dataset Description with open(os.path.join(data_dir, 'desc.json'), 'r') as fh: self.data_desc = json.load(fh) # Read LMDB image database self.lmdb_env = lmdb.Environment(os.path.join(data_dir, 'line-images.lmdb'), map_size=1e12, readonly=True) self.lmdb_txn = self.lmdb_env.begin(buffers=True) # Divide dataset into classes by width of images images for two purposes: # (1) It is more efficient to group images of roughly the samze size in a minibatch, # because that results in less padding, and thus less wasted computation, per minibatch # (2) It is probably true (although we haven't shown this conclusively) that the system can # learn faster by starting of on smaller images and working its way up to longer images # # We emprically measure that for IAM data, the breakdown in line-widths is roughly: # Width Range Cumualtive Percent of Data # 0-300 10% # 300-350 20% # 350-400 40% # 400-500 70% # 500-600 90% # 600+ 100% self.size_group_limits = [400, 600, 999999999] self.size_group_keys = self.size_group_limits self.size_groups = dict() self.size_groups_dict = dict() for cur_limit in self.size_group_limits: self.size_groups[cur_limit] = [] self.size_groups_dict[cur_limit] = dict() self.writer_id_map = dict() for idx, entry in enumerate(self.data_desc[self.split]): # First handle writer id if not entry['writer'] in self.writer_id_map: self.writer_id_map[entry['writer']] = len(self.writer_id_map) # Now figure out which size-group it belongs in for cur_limit in self.size_group_limits: if entry['width'] < cur_limit: self.size_groups[cur_limit].append(idx) self.size_groups_dict[cur_limit][idx] = 1 break logger.info("Done.")
def __init__(self, filename, name): ''' Create a lmdb-backed VStore using a cached environment ''' if filename not in self._allenvs: self._allenvs[filename] = lmdb.Environment(filename, map_size=100<<30, max_dbs=100) self._env = self._allenvs[filename] self._db = self._env.open_db(name);
def __init__(self,db_dir,transform=None,zfill_len=8,name_suffix='_INTEGRATED_'): self.db_dir = db_dir self.transform = transform self.zfill_len = zfill_len self.name_suffix = name_suffix self.env = lmdb.Environment(self.db_dir,readonly=True) self._len = self.env.stat()['entries']
def __init__(self, filename, serializer, deserializer, indexes=None, flag=None, _size=DEFAULT_SIZE): """Constructor for the IndexedDatabase class. Args: filename (str): The filename in that case Database name - <host>:<port>/<db name>. serializer (function): converts entries to bytes deserializer (function): restores items from bytes indexes (dict:(str,function):optional): dict of index names to key functions. The key functions use the deserialized value and produce n index keys, that will reference the items primary key. Defaults to None flag (str:optional): a flag indicating the mode for opening the database. Refer to the documentation for anydbm.open(). Defaults to None. """ super(IndexedDatabase, self).__init__() url = urlparse(filename) LOGGER.debug("ORIENTDB url=%s", url) create = bool(flag == 'c') client = pyorient.OrientDB(ORIENTDB_HOST, 2424) LOGGER.debug("TESTING ORIENTDB client=%s", client) session_id = client.connect(DB_USER, DB_PASS) LOGGER.debug("_ORIENTDB_ client=%s session_id=%s", client, session_id) #db = client.db_create( DB_NAME, pyorient.DB_TYPE_GRAPH, pyorient.STORAGE_TYPE_PLOCAL ) is_db = client.db_exists(DB_NAME, pyorient.STORAGE_TYPE_PLOCAL) if flag == 'n': if os.path.isfile(filename): os.remove(filename) create = True if indexes is None: indexes = {} self._serializer = serializer self._deserializer = deserializer self._lmdb = lmdb.Environment(path=filename, map_size=_size, map_async=True, writemap=True, readahead=False, subdir=False, create=create, max_dbs=len(indexes) + 1, lock=True) self._main_db = self._lmdb.open_db('main'.encode()) self._indexes = \ {name: self._make_index_tuple(name, index_info) for name, index_info in indexes.items()}
def storeSTRINT(subfolder, key, val): if not type(key) is str or not type(val) is int: raise TypeError('wrong Types for storeSTRINT') with lmdb.Environment.begin(lmdb.Environment( '/home/alex/workspace-noneclipse/crycsv/' + subfolder + '/', max_dbs=0), write=True, buffers=True) as txn: txn.put(str(key).encode(), val.to_bytes(10, 'little'))
def reset_lmdb_database(self) -> None: """ A function that needs to be called after each epoch, in case of using an lmdb dataset, to close the environment and open a new one to kill active readers """ if self.save_buffer_on_disk: if self._is_lmdb_env_created(): self._lmdb_env.close() self._lmdb_env = lmdb.Environment(self.buffer_db_dir, map_size=self.map_size, max_spare_txns=6)
def __init__(self, db_file): self._env = lmdb.Environment(db_file, map_size=1024**4, max_dbs=3) with self._env.begin(write=True) as txn: self._master_db = self._env.open_db(b'master', txn=txn) self._attr_index_db = self._env.open_db(b'attr_index', txn=txn, dupsort=True) self._attr_value_db = self._env.open_db(b'attr_value', txn=txn, dupsort=True)
def getExchMarkCurList(subfolder, key): if not type(key) is int: raise TypeError('wrong Types for getExchMarkCurList') with lmdb.Environment.begin(lmdb.Environment(rootDatasDir + subfolder + '/', max_dbs=0), write=False, buffers=True) as txn: bina = txn.get(type_IntToByte(key)) return type_ByteToFromJson(bina)
def create_dataset(): img_db_fn = 'data/image_train.lmdb' del_and_create(img_db_fn) img_env = lmdb.Environment(img_db_fn, map_size=1099511627776) img_txn = img_env.begin(write=True, buffers=True) jnt_db_fn = 'data/joint_train.lmdb' del_and_create(jnt_db_fn) jnt_env = lmdb.Environment(jnt_db_fn, map_size=1099511627776) jnt_txn = jnt_env.begin(write=True, buffers=True) keys = np.arange(100000) np.random.shuffle(keys) ''' img_fns = glob.glob('data/FLIC-full/crop/*.jpg') img_fns += glob.glob('data/lspet_dataset/crop/*.jpg') jnt_fns = glob.glob('data/FLIC-full/joint/*.npy') jnt_fns += glob.glob('data/lspet_dataset/joint/*.npy') ''' img_fns = glob.glob('data/lspet_dataset/mvnCrop/*.jpg') jnt_fns = glob.glob('data/lspet_dataset/joint/*.npy') for i, (img_fn, jnt_fn) in enumerate(zip(sorted(img_fns), sorted(jnt_fns))): img_datum = get_img_datum(img_fn) jnt_datum = get_jnt_datum(jnt_fn) key = '%010d' % keys[i] img_txn.put(key, img_datum.SerializeToString()) jnt_txn.put(key, jnt_datum.SerializeToString()) if i % 10000 == 0: img_txn.commit() jnt_txn.commit() jnt_txn = jnt_env.begin(write=True, buffers=True) img_txn = img_env.begin(write=True, buffers=True) print i, os.path.basename(img_fn), os.path.basename(jnt_fn) img_txn.commit() jnt_txn.commit() img_env.close() jnt_env.close()
def __init__(self, app): assert app.config['data_dir'] BaseService.__init__(self, app) db_directory = os.path.join(app.config['data_dir'], 'lmdb') self.env = lmdb.Environment(db_directory, map_size=TB) self.db_directory = db_directory self.uncommitted = dict() self.stop_event = Event()