def merge_levelDB(db_dir, new_folder, image_dims, isgray): db = plyvel.DB(new_folder, create_if_missing=True, error_if_exists=True, write_buffer_size=268435456) wb = db.write_batch() dirs = os.listdir(db_dir) count = 0 for levelDB in dirs: print levelDB h = leveldb.LevelDB(db_dir + "/" + levelDB) datum = caffe_pb2.Datum() for key_val, ser_str in h.RangeIter(): datum.ParseFromString(ser_str) # do resize if image_dims is not None: datum = resize_datum(datum, image_dims, isgray) wb.put('%08d_%s' % (count, file), datum.SerializeToString()) count = count + 1 if count % 1000 == 0: # Write batch of images to database wb.write() del wb wb = db.write_batch() print 'Processed %i images.' % count if count % 1000 != 0: # Write last batch of images wb.write() print 'Processed a total of %i images.' % count else: print 'Processed a total of %i images.' % count
def load_all(self): """The function to load all data and labels Give: data: the list of raw data, needs to be decompressed (e.g., raw JPEG string) labels: 0-based labels, in format of numpy array """ start = time.time() print("Start Loading Data from CSV File {}".format(self._source_fn)) try: db_ = lmdb.open(self._source_fn) data_cursor_ = db_.begin().cursor() if self._label_fn: label_db_ = lmdb.open(self._label_fn) label_cursor_ = label_db_.begin().cursor() # begin reading data if self._label_fn: label_cursor_.first() while data_cursor_.next(): value_str = data_cursor_.value() datum_ = caffe_pb2.Datum() datum_.ParseFromString(value_str) self._data.append(datum_.data) if self._label_fn: label_cursor_.next() label_datum_ = caffe_pb2.Datum() label_datum_.ParseFromString(label_cursor_.value()) label_ = caffe.io.datum_to_array(label_datum_) label_ = ":".join([str(x) for x in label_.astype(int)]) else: label_ = str(datum_.label) self._labels.appen(label_) # close all db db_.close() if self._label_fn: label_db_.close() except: raise Exception("Error in Parsing input file") end = time.time() self._labels = np.array(self._labels) print("Loading {} samples Done: Time cost {} seconds".format( len(self._data), end - start)) return self._data, self._labels
def next(self): if not self._data_cursor.next(): self._data_cursor = self._db.begin().cursor() value_str = self._data_cursor.value() datum = caffe_pb2.Datum() datum.ParseFromString(value_str) data = datum.data or datum.float_data image = self.decode_image_str(data) label = datum.label return image, label
def convert_to_jpeg(db_dir): env = lmdb.open(db_dir) datum = caffe_pb2.Datum() with env.begin() as txn: cursor = txn.cursor() for key_val,ser_str in cursor: datum.ParseFromString(ser_str) print "\nKey val: ", key_val print "\nLabel: ", datum.label rows = datum.height; cols = datum.width; img_pre = np.fromstring(datum.data,dtype=np.uint8) img = img_pre.reshape(rows, cols) file_name = str(key_val) + "_" + str(datum.label) + ".jpg" scipy.misc.toimage(img, cmin=0.0, cmax=255.0).save("data/mnist/jpg/" + file_name)
def vis_db(db_dir, output_dir, image_dims, is_gray, img_num, save_flag): if os.path.exists(output_dir): print 'The folder already exists, would clean the folder' shutil.rmtree(output_dir) count = 0 h = leveldb.LevelDB(db_dir) datum = caffe_pb2.Datum() for key_val, ser_str in h.RangeIter(): count = count + 1 datum.ParseFromString(ser_str) rows = datum.height cols = datum.width channel = datum.channels label = datum.label print rows, cols, channel, label img_pre = np.fromstring(datum.data, dtype=np.uint8) img = img_pre.reshape(channel, rows, cols) img = img.transpose((1, 2, 0)) print img.shape # Change to BGR if not is_gray: img = img[:, :, (2, 1, 0)] else: img = img[:, :, 0] im = Image.fromarray(img) # Do resize here if image_dims is not None: im = im.resize((image_dims[1], image_dims[0]), Image.BILINEAR) if (count <= img_num): if save_flag: real_out_dir = os.path.join(output_dir, str(label)) mkdir_p(real_out_dir) im.save(os.path.join(real_out_dir, str(count) + '.jpg'), quality=100) else: im.show() else: break
import lmdb from caffe.io import caffe_pb2, datum_to_array import cv2 as cv env = lmdb.open("mnist_train_lmdb") txn = env.begin() cur = txn.cursor() # print type(cur) for key, value in cur: print(type(key), key) datum = caffe_pb2.Datum() datum.ParseFromString(value) label = datum.label data = datum_to_array(datum) print data.shape print datum.channels image = data[0] # image = data.transpose(1, 2, 0) print(type(label)) cv.imshow(str(label), image) cv.waitKey(0) cv.destroyAllWindows() env.close()