def __init__(self, params, indexlist, phase, proc_id):
        super(BatchLoader, self).__init__()
        self.indexlist = indexlist
        self.proc_id = proc_id
        self.batch_size = params['batch_size']
        self.im_shape = params['im_shape']
        self.phase = phase
        self.queue = Queue(_QSIZE)
        #rec_conn, send_conn = Pipe()
        # self.rec_conn = rec_conn
        # self.send_conn = send_conn
        ## Dividing with rest the batch size for the jobs we have
        self.batch_ck_size = self.batch_size // _nJobs
        ## in case of the last jobs adding the rest
        if self.proc_id == (_nJobs - 1):
            self.batch_ck_size += self.batch_size % _nJobs
        ## Opening LMDB
        lmdb_output_pose_env = lmdb.Environment(params['source'] +
                                                '/pose_lmdb/',
                                                readonly=True,
                                                lock=False)
        self.cur_pose = lmdb_output_pose_env.begin().cursor()
        lmdb_output_flip_env = lmdb.Environment(params['source'] +
                                                '/flip_lmdb/',
                                                readonly=True,
                                                lock=False)
        self.cur_flip = lmdb_output_flip_env.begin().cursor()
        lmdb_output_land_env = lmdb.Environment(params['source'] +
                                                '/land_lmdb/',
                                                readonly=True,
                                                lock=False)
        self.cur_land = lmdb_output_land_env.begin().cursor()
        ################
        self.Nimgs = len(self.indexlist)
        # this class does some simple data-manipulations
        #proto_data = open(params['mean_file'], "rb").read()

        #a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)
        #mean  = caffe.io.blobproto_to_array(a)[0]
        ## mean is read BGR and c,h,w; we convert it to h,w,c.
        ## BGR is OK since OpenCV and caffe are BGR
        ## Then MySimpleTransformer will remove mean after that the image
        ## has been changed to BGR as well. So apple-to-apple.
        self.transformer = MySimpleTransformer()
        self.aug_tr = aug_tracker.AugmentationTracker()

        if params['mean_file'] is not None:
            mean = np.load(params['mean_file'])
            mean = mean.transpose(1, 2, 0)
            mean = np.float32(mean)
            self.transformer.set_mean(mean)

        if self.phase == 1:
            util.myprint("BatchLoader_valid" + str(self.proc_id) +
                         " initialized with " + str(self.Nimgs) + " images")
        else:
            util.myprint("BatchLoader_train" + str(self.proc_id) +
                         "  initialized with " + str(self.Nimgs) + " images")
            util.myprint("This will process: " + str(self.batch_ck_size) +
                         '/' + str(self.batch_size))
Exemplo n.º 2
0
def make_database(db_name, files, map):

  im_db_name = db_name + '_im_db'
  label_db_name = db_name + '_label_db'

  if os.path.isdir(im_db_name):
    raise Exception(im_db_name + ' already exists. Delete it')
  if os.path.isdir(label_db_name):
    raise Exception(label_db_name + ' already exists. Delete it')

  # open the database for writing
  im_db = lmdb.Environment(im_db_name, map_size=1000000000000)
  label_db = lmdb.Environment(label_db_name, map_size=1000000000000)

  # output image file size
  sz = (256, 256)

  with im_db.begin(write=True) as im_db_txn:
    with label_db.begin(write=True) as label_db_txn:
      for idx, file in enumerate(files):
        # get the label
        label = np.asarray(map[file], dtype=np.float)
        
        # make the label N x 1 x 1
        label = label.reshape(label.shape + (1,1))

        # load the image (RGB)
        im = caffe.io.load_image(BASE_DIR + 'imageAlignedLD/' + file[6:])
        im = caffe.io.resize_image(im, sz)

        # channel swap for pre-trained (RGB -> BGR)
        im = im[:, :, [2,1,0]]
        
        # make channels x height x width
        im = im.swapaxes(0,2).swapaxes(1,2)
      
        # convert to uint8
        im = (255*im).astype(np.uint8, copy=False) 
     
        # image to datum 
        im_datum = caffe.io.array_to_datum(im)
        im_datum.ClearField('label')
        im_str = im_datum.SerializeToString()

        # label to datum
        label_datum = caffe.io.array_to_datum(label)
        label_datum.ClearField('label')
        label_str = label_datum.SerializeToString()
      
        # insert into the database 
        im_db_txn.put(file, im_str)
        label_db_txn.put(file, label_str)
      
        if idx % 500 == 0:
          print "processed %d of %d (%s)" % (idx, len(files), db_name)
def main():
    readEnv = lmdb.Environment(input_fpath, readonly=True)
    writeEnv = lmdb.Environment(output_fpath,
                                readonly=False,
                                map_size=1000000000000)  # 1 TB
    data = readPairs(fpath)
    with readEnv.begin() as readTx:
        for pair in data:
            f = readTx.get(pair[0])
            with writeEnv.begin(write=True) as writeTxn:
                writeTxn.put(pair[1], f)
            tic_toc_print('Done for %s' % pair)
Exemplo n.º 4
0
def read_test():
    img_db_fn = 'data/image_train.lmdb'
    img_env = lmdb.Environment(img_db_fn, map_size=1099511627776)
    img_txn = img_env.begin(write=True, buffers=True)
    img_cur = img_txn.cursor()

    jnt_db_fn = 'data/joint_train.lmdb'
    jnt_env = lmdb.Environment(jnt_db_fn, map_size=1099511627776)
    jnt_txn = jnt_env.begin(write=True, buffers=True)
    jnt_cur = jnt_txn.cursor()

    for _ in range(10000):
        img_cur.next()
        jnt_cur.next()

    img_datum = caffe.io.caffe_pb2.Datum()
    jnt_datum = caffe.io.caffe_pb2.Datum()

    if not os.path.exists('data/test'):
        os.makedirs('data/test')
    for i in range(100):
        img_key, img_value = img_cur.item()
        jnt_key, jnt_value = jnt_cur.item()
        if img_key != jnt_key:
            sys.exit('img_key and jnt_key should be same')

        img_datum.ParseFromString(img_value)
        jnt_datum.ParseFromString(jnt_value)

        img_data = [struct.unpack('B', d) for d in img_datum.data]
        img_data = np.asarray(img_data, dtype=np.uint8)
        img_data = img_data.reshape(
            (img_datum.channels, img_datum.height, img_datum.width))
        img = np.array(img_data.swapaxes(0, 2).swapaxes(0, 1))

        cv.imwrite('data/test/%d.jpg' % i, img)
        img = cv.imread('data/test/%d.jpg' % i)
        jnt_data = np.asarray(jnt_datum.float_data).reshape((7, 2))
        for j in jnt_data:
            jt = (int(j[0] * img.shape[1]), int(j[1] * img.shape[0]))
            print jt
            cv.circle(img, jt, 5, (0, 0, 255), -1)
        cv.imwrite('data/test/%d.jpg' % i, img)

        img_cur.next()
        jnt_cur.next()

        print i

    img_env.close()
    jnt_env.close()
def storeSTRs(subfolder, key, val):
    with lmdb.Environment.begin(lmdb.Environment(
            '/home/alex/workspace-noneclipse/crycsv/' + subfolder + '/',
            max_dbs=0),
                                write=True,
                                buffers=True) as txn:
        txn.put(str(key).encode(), str(val).encode())
Exemplo n.º 6
0
def test_lmdb(name='train_QP36_QP40'):
    # env_db = lmdb.Environment(name)
    dir = '/data/disk2/SHVC_SNR/data/rec_data/lmdb/train_QP36_QP39_part8'  # /train_QP36_QP40 dir
    env_db = lmdb.Environment(dir)
    # env_db = lmdb.open("./trainC")
    txn = env_db.begin()
    # get函数通过键值查询数据,如果要查询的键值没有对应数据,则输出None
    buf = txn.get(str('GT_001_088').encode('ascii'))
    print(type(buf))
    # value = np.frombuffer(buf, dtype=np.uint8)
    # value = value.reshape(720, 1280)
    # cv2.imwrite('./test.png', value)

    # k = 1
    # for key, value in txn.cursor():  #遍历
    #         print (key)
    #         value = np.frombuffer(value, dtype=np.uint8)
    #         value = value.reshape(720, 1280)
    #         # print(value.shape)
    #         # cv2.imshow(str(k), value)
    #         # cv2.waitKey()
    #         # cv2.imwrite('./test_'+ str(k) + '.png', value)
    #         k+=1
    #         if k >= 10:
    #                 break

    env_db.close()
Exemplo n.º 7
0
def load_data_into_lmdb(path, features, labels=None):
    env = lmdb.Environment(path, map_size=features.nbytes * 2, subdir=True)
    #env = lmdb.open(lmdb_name, )

    features = features[:, :, None, None]
    with env.begin(write=True) as txn:
        for i in range(features.shape[0]):
            datum = caffe.proto.caffe_pb2.Datum()

            datum.channels = features.shape[1]
            datum.height = 1
            datum.width = 1

            if features.dtype == np.int:
                datum.data = features[i].tostring()
            elif features.dtype == np.float:
                datum.float_data.extend(features[i].flat)
            else:
                raise Exception("features.dtype unknown.")

            if labels is not None:
                datum.label = int(labels[i])

            str_id = '{:08}'.format(i)
            txn.put(str_id, datum.SerializeToString())
Exemplo n.º 8
0
def open_env(version, name):
    path = os.path.join(version, name)
    if os.path.exists(os.path.join(path, 'data.mdb')):
        raise LMDBExistsError
    if not os.path.exists(path):
        os.makedirs(path)
    return lmdb.Environment(path=path, max_dbs=5, create=True)
def getSTRs(subfolder, key):
    with lmdb.Environment.begin(lmdb.Environment(
            '/home/alex/workspace-noneclipse/crycsv/' + subfolder + '/',
            max_dbs=0),
                                write=False,
                                buffers=True) as txn:
        return bytes(txn.get(key.encode())).decode('UTF-8')
Exemplo n.º 10
0
 def write_lmdb(img_to_class_path, lmdb_path):
     img_to_class_fo = open(img_to_class_path, "r")
     lmdb_map_size = 1 << 40
     env = lmdb.Environment(lmdb_path, map_size=lmdb_map_size)
     with env.begin(write=True) as txn:  # txn is Transaction object
         count = 0
         for line in img_to_class_fo.readlines():
             line = line.rstrip()
             img_path = line.split()[0]
             img_class = int(line.split()[1])
             img_data = imageio.imread(img_path).astype(
                 np.float32)  # shape is (n, n)
             img_data = img_data.reshape(img_data.shape[0],
                                         img_data.shape[1],
                                         1)  # shape is (n, n, 1)
             # convert from height-width-channel(HWC) to channel-height-width (CHW)
             img_data = np.transpose(img_data, (2, 0, 1))
             tensor_protos = caffe2_pb2.TensorProtos()
             img_tensor = tensor_protos.protos.add()
             img_tensor.dims.extend(img_data.shape)
             img_tensor.data_type = 1
             flatten_img = img_data.reshape(np.prod(img_data.shape))
             img_tensor.float_data.extend(flatten_img)
             img_class_tensor = tensor_protos.protos.add()
             img_class_tensor.data_type = 2
             img_class_tensor.int32_data.append(img_class)
             txn.put('{}'.format(count).encode('ascii'),
                     tensor_protos.SerializeToString())
             if count % 10 == 0:
                 print("Inserted {} rows".format(count))
             count += 1
     print("Inserted {} rows".format(count))
     print("\nLMDB saved at " + lmdb_path + "\n\n")
     img_to_class_fo.close()
Exemplo n.º 11
0
def test_update_targets():
    variant_ids = "/s/project/kipoi-cadd/data/raw/v1.3/training_data/sample_variant_ids.pkl"
    varids = load_variant_ids(variant_ids)
    lmdb_dir = "/s/project/kipoi-cadd/data/tests/lmdb_3/"
    num_vars = 0
    inputfile = \
        get_data_dir() + "/raw/v1.3/training_data/training_data.imputed.csv"

    row_example = pd.read_csv(inputfile,
                              sep=',',
                              nrows=1,
                              skiprows=1,
                              header=None)
    map_size = cadd_serialize_numpy_row(row_example.values[0], varids[0],
                                        np.float16, 0).to_buffer().size
    map_size = map_size * (varids.shape[0] + 1) * 5

    env = lmdb.Environment(lmdb_dir,
                           lock=False,
                           map_size=map_size,
                           writemap=True)
    with env.begin(write=True, buffers=True) as txn:
        for var in varids:
            row = bytes(txn.get(var.encode('ascii')))
            np_row = pa.deserialize(row)
            if np_row['targets'] == -1:
                np_row['targets'] = 0
                ser_data = pa.serialize(np_row)
                buf = ser_data.to_buffer()
                txn.replace(var.encode('ascii'), buf)
                num_vars += 1
    print("Finished changing", num_vars, "rows.")
Exemplo n.º 12
0
 def write_images_to_db(self, db_name, images, callback=lambda a: 0):
     '''
     Writes images to a lmdb database inside this model's folder. db_name is the name of the
     database. If a database by that name already exists, it will be overwritten. images is a
     list of 2-element tuples. The first elemnt of each tuple should be a path to an image, and
     the second element should be the label of that image.
     '''
     random.shuffle(images)  #Shuffle input data to improve training.
     p = os.path.join(self.get_folder(), db_name)
     s.call(['rm', '-r', p])
     map_size = 256 * 256 * 3 * 2 * len(images)
     env = lmdb.Environment(p, map_size=map_size)
     write_to = env.begin(write=True, buffers=True)
     i = 0
     num_images = len(images)
     update_interval = int(num_images / 100 + 1)
     for image in images:
         try:
             resize_image(image[0])
             input = np.transpose(
                 mp.imread('/tmp/resized.jpg'),
                 (2, 1, 0))  #Caffe wants CxHxW, not the standard WxHxC.
             datum = array_to_datum(input, image[1])
             write_to.put('{:08}'.format(i).encode('ascii'),
                          datum.SerializeToString())
             i += 1
         except:
             pass
         if (i % update_interval == 0):
             callback([(i / num_images, '')])
     write_to.commit()
     env.close()
    def __init__(self, filename, flag):
        """Constructor for the LMDBNoLockDatabase class.

        Args:
            filename (str): The filename of the database file.
            flag (str): a flag indicating the mode for opening the database.
                Refer to the documentation for anydbm.open().
        """
        super().__init__()

        create = bool(flag == 'c')

        if flag == 'n':
            if os.path.isfile(filename):
                os.remove(filename)
            create = True

        self._lmdb = lmdb.Environment(path=filename,
                                      map_size=1024**4,
                                      map_async=True,
                                      writemap=True,
                                      readahead=False,
                                      subdir=False,
                                      create=create,
                                      lock=True)
Exemplo n.º 14
0
def write_images_to_lmdb(img_dir, db_name, labels):
    for root, dirs, files in os.walk(img_dir, topdown = False):
        if root != img_dir:
            continue
        #multiply 2 to make the map_size large enough
        map_size = 2*IMAGE_WIDTH * IMAGE_HEIGHT *3*len(files)
        env = lmdb.Environment(db_name, map_size=map_size)
        txn = env.begin(write=True,buffers=True)

        for idx, name in enumerate(files):
            #print(img_dir + name)
            img = cv2.imread(img_dir + name, cv2.IMREAD_COLOR)
            img = transform_img(img, img_width=IMAGE_WIDTH, img_height=IMAGE_HEIGHT)
            #print(img.shape)
            #print(img)
            img=img.transpose(2,0,1)
            y = labels[name]
            #print(name)
            #print(y)
            datum = array_to_datum(img,y)
            #print(datum)
            #lala
            str_id = '{:08}'.format(idx)
            txn.put(str_id.encode('ascii'), datum.SerializeToString())   

            if idx % 1000 == 1:
                print("transforming" + str(idx) + "th image to sb")
    txn.commit()
    env.close()
    print " ".join(["Writing to", db_name, "done!"])
Exemplo n.º 15
0
    def __init__(self,
                 storage_dir=None,
                 shared_key=get_shared_key_from_environ(),
                 client_password="******"):
        self._shared_key = shared_key
        self._client_password = client_password
        if not storage_dir:
            storage_dir = tempfile.mkdtemp()
        self.file_dir = os.path.join(storage_dir, 'storage')
        try:
            os.makedirs(self.file_dir)
        except:
            pass  # This is OK

        self.db_env = lmdb.Environment(
            os.path.join(storage_dir, 'db'),
            map_size=1024**3,  # 1GB - mostly metadata, so should be fine
            max_dbs=5)
        self.nonce_db = self.db_env.open_db(b'nonce')
        self.inbox_db = self.db_env.open_db(b'inbox', dupsort=True)
        self.metadata_db = self.db_env.open_db(b'metadata')
        self.increment_db = self.db_env.open_db(b'increment')
        self.tracking_db = self.db_env.open_db(b'tracking')

        self.timestamp_source = MonotonicTimestampSource()
Exemplo n.º 16
0
def get_one_batch(lmdb_batch_dir, idx):
    env = lmdb.Environment(lmdb_batch_dir, readonly=True, lock=False)
    with env.begin() as txn:
        buff = bytes(txn.get(str(idx).encode('ascii')))
        ser = blosc.decompress(buff)
        batch = pa.deserialize(ser)
    return batch
Exemplo n.º 17
0
def createDB(name, contenido, funcion):
    db = lmdb.Environment(name, map_size=int(1e12))
    tx = db.begin(write=True)

    for label, imagen in enumerate(contenido):
        im = Image.open(imagen)
        punt = im.fp
        im = im.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
        if 'cat' in os.path.split(imagen)[1]:
            y = 0
        else:
            y = 1
        x = np.array(im.getdata()).reshape(im.size[1], im.size[0], 3)
        datum = array_to_datum(np.transpose(x, (2, 0, 1)), y)

        if funcion(label, SCORE):
            print label
            tx.put('{:08}'.format(label), datum.SerializeToString())

        if not punt.closed:
            punt.close()

        if (label + 1) % 2500 == 0:
            tx.commit()
            tx = db.begin(write=True)
            print '------- commit -------'

    tx.commit()
    db.close()
Exemplo n.º 18
0
    def start(self, ctx=None):
        logger.debug("Starting data engine...")

        # register with the context
        if ctx:
            ctx.bind('dataengine', self)
        if not self.datapath:
            self.datapath = os.path.join(environ.data_dir(), 'stores')
            if not os.path.exists(self.datapath):
                os.mkdir(self.datapath)
        logger.debug("Data path: %s", self.datapath)

        try:
            self.database = lmdb.Environment(self.datapath,
                                             map_size=2000000000,
                                             max_dbs=1024)
            with self.database.begin(write=False) as txn:
                cur = txn.cursor()
                for k, v in iter(cur):
                    logger.debug("Found existing store: %s", k)
                    _db = self.database.open_db(k, create=False)
                    self.stores[k] = Store(k, _db, self)
        except lmdb.Error:
            logger.exception("Failed to open database.", exc_info=True)
            raise

        logger.debug("Data engine started.")
Exemplo n.º 19
0
def create_lmdb(db_name, labels):
    map_size = SEQUENCE_LENGTH * ALPHA_LENGTH * 27 * len(
        labels)  # 27x is just for safety
    env = lmdb.Environment(db_name, map_size=map_size)
    txn = env.begin(write=True, buffers=True)

    X_copy = np.ndarray((3, SEQUENCE_LENGTH, ALPHA_LENGTH))

    for i, label in enumerate(labels):
        print('writing img-{}'.format(i))
        X = mp.imread(IMG_BASE_NAME.format(i)) * 255
        X_rev = np.transpose(X)
        X_copy[0, :, :] = X_rev
        X_copy[1, :, :] = X_rev
        X_copy[2, :, :] = X_rev

        if label < 5:
            class_label = 0
        else:
            class_label = 1

        datum = array_to_datum(X_copy.astype(np.uint8), class_label)
        str_id = '{:08}'.format(i)
        txn.put(str_id.encode('ascii'), datum.SerializeToString())

    txn.commit()
    env.close()
    print('Done creating {}!'.format(db_name))
Exemplo n.º 20
0
def main(json_path=None, lmdb_path=None):
    assert json_path is not None, 'json_path is needed'
    if lmdb_path is None:
        lmdb_path = json_path

    meta = os.path.join(json_path, 'meta.json')
    data_ids = []
    value = {}
    env = lmdb.Environment(lmdb_path,
                           subdir=True,
                           map_size=int(1e9),
                           max_dbs=2,
                           lock=False)
    db_extra = env.open_db('extra'.encode(), create=True)
    db_image = env.open_db('image'.encode(), create=True)
    with open(meta, 'r') as meta_reader:
        for line in tqdm(meta_reader):
            single_meta = json.loads(line)
            data_id = os.path.join(json_path, single_meta['filename'])
            data_id = str(data_id.encode('utf-8').decode('utf-8'))
            with open(data_id.encode(), 'rb') as file_reader:
                image = file_reader.read()
            value['extra'] = {}
            for key in single_meta['extra']:
                value['extra'][key] = single_meta['extra'][key]
            with env.begin(write=True) as lmdb_writer:
                lmdb_writer.put(data_id.encode(),
                                pickle.dumps(value),
                                db=db_extra)
            with env.begin(write=True) as image_writer:
                image_writer.put(data_id.encode(), image, db=db_image)
    env.close()
Exemplo n.º 21
0
    def __init__(self, data_dir, split, alphabet, line_height, transforms):
        logger.info("Loading IAM [%s] dataset..." % split)

        self.data_dir = data_dir
        self.split = split
        self.alphabet = alphabet

        self.preprocess = transforms

        # Read Dataset Description
        with open(os.path.join(data_dir, 'desc.json'), 'r') as fh:
            self.data_desc = json.load(fh)

        # Read LMDB image database
        self.lmdb_env = lmdb.Environment(os.path.join(data_dir,
                                                      'line-images.lmdb'),
                                         map_size=1e12,
                                         readonly=True)
        self.lmdb_txn = self.lmdb_env.begin(buffers=True)

        # Divide dataset into classes by width of images images for two purposes:
        #    (1) It is more efficient to group images of roughly the samze size in a minibatch,
        #        because that results in less padding, and thus less wasted computation, per minibatch
        #    (2) It is probably true (although we haven't shown this conclusively) that the system can
        #        learn faster by starting of on smaller images and working its way up to longer images
        #
        # We emprically measure that for IAM data, the breakdown in line-widths is roughly:
        #    Width Range      Cumualtive Percent of Data
        #       0-300              10%
        #     300-350              20%
        #     350-400              40%
        #     400-500              70%
        #     500-600              90%
        #     600+                 100%

        self.size_group_limits = [400, 600, 999999999]
        self.size_group_keys = self.size_group_limits
        self.size_groups = dict()
        self.size_groups_dict = dict()

        for cur_limit in self.size_group_limits:
            self.size_groups[cur_limit] = []
            self.size_groups_dict[cur_limit] = dict()

        self.writer_id_map = dict()

        for idx, entry in enumerate(self.data_desc[self.split]):
            # First handle writer id
            if not entry['writer'] in self.writer_id_map:
                self.writer_id_map[entry['writer']] = len(self.writer_id_map)

            # Now figure out which size-group it belongs in
            for cur_limit in self.size_group_limits:
                if entry['width'] < cur_limit:
                    self.size_groups[cur_limit].append(idx)
                    self.size_groups_dict[cur_limit][idx] = 1
                    break

        logger.info("Done.")
Exemplo n.º 22
0
	def __init__(self, filename, name):
		''' Create a lmdb-backed VStore using a cached environment '''
		if filename not in self._allenvs:
			self._allenvs[filename] = lmdb.Environment(filename,
				map_size=100<<30,
				max_dbs=100)
		self._env = self._allenvs[filename]
		self._db = self._env.open_db(name);
Exemplo n.º 23
0
 def __init__(self,db_dir,transform=None,zfill_len=8,name_suffix='_INTEGRATED_'):
     self.db_dir = db_dir
     self.transform = transform
     self.zfill_len = zfill_len
     self.name_suffix = name_suffix
     
     self.env = lmdb.Environment(self.db_dir,readonly=True) 
     self._len = self.env.stat()['entries']
Exemplo n.º 24
0
    def __init__(self,
                 filename,
                 serializer,
                 deserializer,
                 indexes=None,
                 flag=None,
                 _size=DEFAULT_SIZE):
        """Constructor for the IndexedDatabase class.

        Args:
            filename (str): The filename in that case Database name - <host>:<port>/<db name>.
            serializer (function): converts entries to bytes
            deserializer (function): restores items from bytes
            indexes (dict:(str,function):optional): dict of index names to key
                functions.  The key functions use the deserialized value and
                produce n index keys, that will reference the items primary
                key. Defaults to None
            flag (str:optional): a flag indicating the mode for opening the
                database.  Refer to the documentation for anydbm.open().
                Defaults to None.
        """
        super(IndexedDatabase, self).__init__()
        url = urlparse(filename)
        LOGGER.debug("ORIENTDB url=%s", url)
        create = bool(flag == 'c')
        client = pyorient.OrientDB(ORIENTDB_HOST, 2424)
        LOGGER.debug("TESTING ORIENTDB client=%s", client)
        session_id = client.connect(DB_USER, DB_PASS)
        LOGGER.debug("_ORIENTDB_ client=%s session_id=%s", client, session_id)
        #db = client.db_create( DB_NAME, pyorient.DB_TYPE_GRAPH, pyorient.STORAGE_TYPE_PLOCAL )
        is_db = client.db_exists(DB_NAME, pyorient.STORAGE_TYPE_PLOCAL)

        if flag == 'n':
            if os.path.isfile(filename):
                os.remove(filename)
            create = True

        if indexes is None:
            indexes = {}

        self._serializer = serializer
        self._deserializer = deserializer

        self._lmdb = lmdb.Environment(path=filename,
                                      map_size=_size,
                                      map_async=True,
                                      writemap=True,
                                      readahead=False,
                                      subdir=False,
                                      create=create,
                                      max_dbs=len(indexes) + 1,
                                      lock=True)

        self._main_db = self._lmdb.open_db('main'.encode())

        self._indexes = \
            {name: self._make_index_tuple(name, index_info)
             for name, index_info in indexes.items()}
def storeSTRINT(subfolder, key, val):
    if not type(key) is str or not type(val) is int:
        raise TypeError('wrong Types for storeSTRINT')
    with lmdb.Environment.begin(lmdb.Environment(
            '/home/alex/workspace-noneclipse/crycsv/' + subfolder + '/',
            max_dbs=0),
                                write=True,
                                buffers=True) as txn:
        txn.put(str(key).encode(), val.to_bytes(10, 'little'))
Exemplo n.º 26
0
 def reset_lmdb_database(self) -> None:
     """
     A function that needs to be called after each epoch, in case of using an lmdb dataset, to close the environment
         and open a new one to kill active readers
     """
     if self.save_buffer_on_disk:
         if self._is_lmdb_env_created():
             self._lmdb_env.close()
         self._lmdb_env = lmdb.Environment(self.buffer_db_dir, map_size=self.map_size, max_spare_txns=6)
Exemplo n.º 27
0
 def __init__(self, db_file):
     self._env = lmdb.Environment(db_file, map_size=1024**4, max_dbs=3)
     with self._env.begin(write=True) as txn:
         self._master_db = self._env.open_db(b'master', txn=txn)
         self._attr_index_db = self._env.open_db(b'attr_index',
                                                 txn=txn,
                                                 dupsort=True)
         self._attr_value_db = self._env.open_db(b'attr_value',
                                                 txn=txn,
                                                 dupsort=True)
def getExchMarkCurList(subfolder, key):
    if not type(key) is int:
        raise TypeError('wrong Types for getExchMarkCurList')
    with lmdb.Environment.begin(lmdb.Environment(rootDatasDir + subfolder +
                                                 '/',
                                                 max_dbs=0),
                                write=False,
                                buffers=True) as txn:
        bina = txn.get(type_IntToByte(key))
        return type_ByteToFromJson(bina)
Exemplo n.º 29
0
def create_dataset():
    img_db_fn = 'data/image_train.lmdb'
    del_and_create(img_db_fn)
    img_env = lmdb.Environment(img_db_fn, map_size=1099511627776)
    img_txn = img_env.begin(write=True, buffers=True)

    jnt_db_fn = 'data/joint_train.lmdb'
    del_and_create(jnt_db_fn)
    jnt_env = lmdb.Environment(jnt_db_fn, map_size=1099511627776)
    jnt_txn = jnt_env.begin(write=True, buffers=True)

    keys = np.arange(100000)
    np.random.shuffle(keys)
    '''
    img_fns = glob.glob('data/FLIC-full/crop/*.jpg')
    img_fns += glob.glob('data/lspet_dataset/crop/*.jpg')
    jnt_fns = glob.glob('data/FLIC-full/joint/*.npy')
    jnt_fns += glob.glob('data/lspet_dataset/joint/*.npy')
	'''
    img_fns = glob.glob('data/lspet_dataset/mvnCrop/*.jpg')
    jnt_fns = glob.glob('data/lspet_dataset/joint/*.npy')
    for i, (img_fn, jnt_fn) in enumerate(zip(sorted(img_fns),
                                             sorted(jnt_fns))):
        img_datum = get_img_datum(img_fn)
        jnt_datum = get_jnt_datum(jnt_fn)
        key = '%010d' % keys[i]

        img_txn.put(key, img_datum.SerializeToString())
        jnt_txn.put(key, jnt_datum.SerializeToString())

        if i % 10000 == 0:
            img_txn.commit()
            jnt_txn.commit()
            jnt_txn = jnt_env.begin(write=True, buffers=True)
            img_txn = img_env.begin(write=True, buffers=True)

        print i, os.path.basename(img_fn), os.path.basename(jnt_fn)

    img_txn.commit()
    jnt_txn.commit()
    img_env.close()
    jnt_env.close()
Exemplo n.º 30
0
    def __init__(self, app):
        assert app.config['data_dir']

        BaseService.__init__(self, app)

        db_directory = os.path.join(app.config['data_dir'], 'lmdb')

        self.env = lmdb.Environment(db_directory, map_size=TB)
        self.db_directory = db_directory
        self.uncommitted = dict()
        self.stop_event = Event()