def data_loader(dataset_file):
    db = leveldb.LevelDB(dataset_file)
    datum = caffe_pb2.Datum()

    img1s = []
    img2s = []
    labels = []

    for key, value in db.RangeIter():
        datum.ParseFromString(value)

        label = datum.label
        data = caffe.io.datum_to_array(datum)

        # split data from 6-channel image into 2 3-channel images
        img1 = data[:3, :, :]
        img2 = data[3:, :, :]

        labels.append(label)
        img1s.append(img1)
        img2s.append(img2)

    return labels, img1s, img2s
Пример #2
0
def write_lmdb(db_path, list_filename, height, width, count_start=0, encode=False):
  map_size = 2 * 50000 * 256 * 256 * 3
  db = lmdb.open(db_path, map_size=map_size)
  writer = db.begin(write=True)
  datum = caffe_pb2.Datum()
  for count, line in enumerate(open(list_filename, 'r')):
    img_filename, label = line.strip().split(' ')
    datum.label = int(label)
    datum.channels = 3
    datum.height = height
    datum.width = width
    if encode:
      datum.encoded = True
      img = cv2.resize(cv2.imread(img_filename, 1), (height, width))
      _, img_jpg = cv2.imencode('.jpg', img)
      datum.data = img_jpg.tostring()
    else:
      datum.data = np.rollaxis(img, 2, 0).tostring()
    key = '%010d' % (count + count_start)
    writer.put(key, datum.SerializeToString(), append=True)
    print (key, label)
  writer.commit()
  db.close()
def load_lmdb_label(lmdb_path, label_cnt):
    """load the label vector

       Returns:
            label_vector
    """
    print 'loading lmdb label vector ...'
    feature_lmdb_env = lmdb.open(lmdb_path)
    lmdb_txn = feature_lmdb_env.begin()
    lmdb_cursor = lmdb_txn.cursor()
    datum = caffe_pb2.Datum()
    label_vector = np.zeros((label_cnt), dtype=np.int32)
    for ix, (key, value) in enumerate(lmdb_cursor):
        if ix == label_cnt:
            break
        datum.ParseFromString(value)
        label_vector[ix] = datum.label
        data = caffe.io.datum_to_array(datum)
        data = np.squeeze(data)[:]
        if (ix + 1) % 2000 == 0:
            print 'label vector process %d' % (ix + 1)
    print 'finished loading lmdb label ...'
    return label_vector
def load_lmdb(lmdb_path, feature_cnt, feature_dim, pre_mode):
    """change the txt file to dump store

       Args:
            pre_mode : [0 no process] [1 : L2-normalize] [2 : binarize]
       Returns:
            data_feature_vector
    """
    print 'loading lmdb ...'
    feature_lmdb_env = lmdb.open(lmdb_path)
    lmdb_txn = feature_lmdb_env.begin()
    lmdb_cursor = lmdb_txn.cursor()
    datum = caffe_pb2.Datum()
    num = feature_cnt
    data_feature_vector = np.zeros((num, feature_dim), dtype=np.float64)
    for ix, (key, value) in enumerate(lmdb_cursor):
        if ix == feature_cnt:
            break
        # datum.ParseFromString(key)
        # data = caffe.io.datum_to_array(datum)
        # image_name = data[0]
        datum.ParseFromString(value)
        data = caffe.io.datum_to_array(datum)
        data = np.squeeze(data)[:]
        data_feature_vector[ix, :] = data
        if pre_mode > 0:
            if pre_mode == 1:
                data_feature_vector[ix, :] = normalize_1d(data)
            elif pre_mode == 3:
                cache = sign_power(data)
                data_feature_vector[ix, :] = cache
            else:
                data_feature_vector[ix, :] = binarize(data, feature_dim)
        if (ix + 1) % 2000 == 0:
            print 'feature process %d' % (ix + 1)
    print 'finished loading lmdb ...'
    return data_feature_vector
def convertData2Lmdb(datapath):
    datadict = sio.loadmat(datapath)
    datadict.pop('__version__')
    datadict.pop('__header__')
    datadict.pop('__globals__')

    allplans = getPlan()
    for index, currplan in enumerate(allplans):
        map_size = currplan[0][0] * 20659 * 8
        env = lmdb.open('gene_' + str(index) + '_lmdb', map_size=map_size)
        txn = env.begin(write=True)
        count = 0
        for dictkeys in datadict:
            keywords = dictkeys.split('_')
            if keywords[1] not in currplan: continue
            if keywords[2] not in currplan[keywords[1]]: continue
            if currplan[keywords[1]][keywords[2]] == 0: continue
            currplan[keywords[1]][keywords[2]] -= 1
            data = [[[]]]
            label = 0 if keywords[2] == 'F' else 1
            for dictvals in datadict[dictkeys]:
                for dataelem in dictvals:
                    #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini
                    data[0][0].append(float(dataelem))
            dataarr = np.array(data)
            datum = caffe_pb2.Datum()
            datum = caffe.io.array_to_datum(dataarr, label)
            str_id = '{:08}'.format(count)
            txn.put(str_id, datum.SerializeToString())
            count += 1
            if count % 1000 == 0 or count == currplan[0][0]:
                print('train: already handled with {} samples'.format(count))
                txn.commit()
                txn = env.begin(write=True)
            if count == currplan[0][0]: break
        txn.commit()
        env.close()
Пример #6
0
def write_lmdb(db_path, dataset_name, count_start=0, encode=False):
    dataset = unpickle(dataset_name)
    map_size = (count_start + len(dataset['labels'])) * 2 * (3 * 32 * 32 + 128)
    db = lmdb.open(db_path, map_size=map_size)
    writer = db.begin(write=True)
    datum = caffe_pb2.Datum()
    for count, (img,
                label) in enumerate(zip(dataset['data'], dataset['labels'])):
        datum.label = label
        datum.channels = 3
        datum.height = 32
        datum.width = 32
        if encode:
            datum.encoded = True
            img_rgb = np.rollaxis(img.reshape(3, 32, 32), 0, 3)
            _, img_png = cv2.imencode('.jpg', img_rgb)
            datum.data = img_png.tostring()
        else:
            datum.data = img.tostring()
        key = '%010d' % (count + count_start)
        writer.put(key, datum.SerializeToString(), append=True)
        print(key, label)
    writer.commit()
    db.close()
def Read_lmdb(lmdb_path, opt, mode='RGB'):  # lmdb_path, trina/val, RGB
    if opt == 'train':
        lmdb_file = lmdb_path + 'train_lmdb/'
    elif opt == 'val':
        lmdb_file = lmdb_path + 'val_lmdb/'
    lmdb_env = lmdb.open(lmdb_file)
    lmdb_txn = lmdb_env.begin()
    lmdb_cursor = lmdb_txn.cursor()
    datum = caffe_pb2.Datum()

    images = []
    labels = []
    for key, value in lmdb_cursor:
        datum.ParseFromString(value)

        label = datum.label
        data = caffe.io.datum_to_array(datum)
        im = data.astype(np.uint8)
        im = np.array(im)

        images.append(im)  # add image to array
        labels.append(label)  # add label to array

    return {'images': images, 'labels': labels}
Пример #8
0
def read_images_from_lmdb(db_name, visualize):
    env = lmdb.open(db_name)
    txn = env.begin()
    cursor = txn.cursor()
    X = []
    y = []
    idxs = []
    for idx, (key, value) in enumerate(cursor):
        datum = caffe_pb2.Datum()
        datum.ParseFromString(value)
        X.append(np.array(datum_to_array(datum)).swapaxes(0, 2))
        y.append(datum.label)
        idxs.append(idx)
    if visualize:
        print("Visualizing a few images...")
        for i in range(9):
            img = X[i]**(1 / 8)
            plt.subplot(3, 3, i + 1)
            plt.imshow(img)
            plt.title(y[i])
            plt.axis('off')
        plt.show()
    print(" ".join(["Reading from", db_name, "done!"]))
    return X, y, idxs
Пример #9
0
def main():
    """
    从头遍历所有图片
    """
    db = lmdb.open('train_lmdb')
    txn = db.begin()
    cursor = txn.cursor()
    datum = caffe_pb2.Datum()

    cnt = 0
    for key, value in cursor:
        datum.ParseFromString(value)

        label = datum.label
        data = caffe.io.datum_to_array(datum)

        # CxHxW to HxWxC in cv2
        image = np.transpose(data, (1, 2, 0))
        if cnt == 2:
            cv2.imwrite('test.png', image)
            print('{},{}'.format(key, label))
            break
        cnt += 1
        print(cnt)
Пример #10
0
    def get_classes(self):
        """
        :return: the different classes available inside the LMDB file.
        """
        lmdb_env = lmdb.open(self.lmdb_folder)
        lmdb_txn = lmdb_env.begin()
        lmdb_cursor = lmdb_txn.cursor()

        classes = {}

        datum = caffe_pb2.Datum()
        for key, value in lmdb_cursor:
            datum.ParseFromString(value)

            label = datum.label

            if label not in classes:
                classes[label] = 0

            classes[label] += 1

        lmdb_env.close()

        return classes
def save_to_lmdb(images, labels, lmdb_file):
    if not os.path.exists(lmdb_file):
        batch_size = 256
        lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))
        lmdb_txn = lmdb_env.begin(write=True)
        item_id = 0
        datum = caffe_pb2.Datum()

        for i in range(images.shape[0]):
            im = cv2.imread(images[i])
            if im is None:
                continue
            im = cv2.resize(im, (IM_HEIGHT, IM_WIDTH))
            datum.channels = im.shape[2]
            datum.height = im.shape[0]
            datum.width = im.shape[1]
            datum.data = im.tobytes()
            datum.label = labels[i]
            keystr = '{:0>8d}'.format(item_id)
            lmdb_txn.put(keystr, datum.SerializeToString())

            # write batch
            if (item_id + 1) % batch_size == 0:
                lmdb_txn.commit()
                lmdb_txn = lmdb_env.begin(write=True)
                print('converted {} images'.format(item_id + 1))

            item_id += 1

        # write last batch
        if (item_id + 1) % batch_size != 0:
            lmdb_txn.commit()
            print('converted {} images'.format(item_id + 1))
            print('Generated ' + lmdb_file)
    else:
        print(lmdb_file + ' already exists')
Пример #12
0
def NegativeSampleMining(lmdbWriter, imgData, gtList, stride):
    width = gtList[0][4]
    height = gtList[0][5]
    datum = caffe_pb2.Datum()
    for row in range(0,imgData.size[1]-height-1,stride):
        for col in range(0, imgData.size[0]-width-1, stride):
            hit = False
            imgDisp = imgData.copy()
            draw = ImageDraw.Draw(imgDisp)
            b = (col, row, col + width, row + height, width, height)
            for gt in gtList:
                # print col, row, width, height, CalcIoU(a,b)
                # draw.rectangle([(col, row), (col + width, row + height)])
                iou = CalcIoU(gt, b)
                plt.title(iou)
                if (iou > 0.2):
                    hit = True
                    break

            if hit == False:
                draw.rectangle([(col,row),(col+width,row+height)])
                count[0] = count[0] + 1
                patch = GetCropImage(imgData, 0, (col, row, col + width, row + height))
                lmdbWriter.Put(np.array(patch), 0)
Пример #13
0
import sys
caffe_root = '/usr/local/caffe/'
sys.path.insert(0, caffe_root + 'python')
import caffe
import lmdb
import numpy as np
from caffe.proto import caffe_pb2
from util import yuv2rgb
from PIL import Image
y_lmdb_file = '/home/jiangliang/code/caffe_colorization/data/flowers/train_y.lmdb'
uv_lmdb_file = '/home/jiangliang/code/caffe_colorization/data/flowers/train_uv.lmdb'
y_lmdb = lmdb.open(y_lmdb_file, map_size=int(1e12))
uv_lmdb = lmdb.open(uv_lmdb_file, map_size=int(1e12))
y_txn = y_lmdb.begin()
y_cursor = y_txn.cursor()
y_datum = caffe_pb2.Datum()

uv_txn = uv_lmdb.begin()
uv_cursor = uv_txn.cursor()
uv_datum = caffe_pb2.Datum()

for key, value in y_cursor:
    print key
    y_datum.ParseFromString(value)
    data = caffe.io.datum_to_array(y_datum)
    for key_l, value_l in uv_cursor:
        uv_datum.ParseFromString(value_l)
        label = caffe.io.datum_to_array(uv_datum)
        yuv = np.zeros((3, 227, 227))
        yuv[0, :, :] = data
        yuv[1:3, :, :] = label
def convertData2Lmdb(datapath):
    datadict = {}
    domnlist = [
        x for x in os.listdir(datapath)
        if os.path.isdir(os.path.join(datapath, x))
    ]
    domnlist.sort()
    for domnitem in domnlist:
        datadict[domnitem] = {}
        labllist = [
            x for x in os.listdir(os.path.join(datapath, domnitem))
            if os.path.isdir(os.path.join(datapath, domnitem, x))
        ]
        labllist.sort()
        for c, lablitem in enumerate(labllist):
            datadict[domnitem][c] = []
            lablpath = os.path.join(datapath, domnitem, lablitem)
            filelist = glob.iglob(os.path.join(lablpath, '*.jpg'))
            filenumb = 0
            for f in filelist:
                img = cv2.imread(f)
                img = cv2.resize(img, (256, 256))
                img = img.transpose((2, 0, 1))
                datadict[domnitem][c].append(img)

    tranplan, testplan = getPlan()
    map_size = 4000 * 196608 * 8
    env = lmdb.open('image_train_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    counter = {}
    checker = True
    while checker:
        checker = False
        for domnitem in tranplan:
            if domnitem not in counter:
                counter[domnitem] = {}
            for c in tranplan[domnitem]:
                if c not in counter[domnitem]:
                    counter[domnitem][c] = 0
                if counter[domnitem][c] < tranplan[domnitem][c]:
                    index = counter[domnitem][c] % len(datadict[domnitem][c])
                    data = datadict[domnitem][c][index]
                    datum = caffe_pb2.Datum()
                    datum.channels = 3
                    datum.height = data.shape[1]
                    datum.width = data.shape[2]
                    datum.data = data.tostring()
                    datum.label = c
                    str_id = '{:08}'.format(count)
                    txn.put(str_id, datum.SerializeToString())
                    count += 1
                    if count % 1000 == 0:
                        print('train: already handled with {} samples'.format(
                            count))
                        txn.commit()
                        txn = env.begin(write=True)
                    counter[domnitem][c] += 1
                    checker = True
    txn.commit()
    env.close()

    map_size = 2000 * 196608 * 8
    env = lmdb.open('image_test_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    counter = {}
    checker = True
    while checker:
        checker = False
        for domnitem in testplan:
            if domnitem not in counter:
                counter[domnitem] = {}
            for c in testplan[domnitem]:
                if c not in counter[domnitem]:
                    counter[domnitem][c] = 0
                if counter[domnitem][c] < testplan[domnitem][c]:
                    index = counter[domnitem][c] % len(datadict[domnitem][c])
                    data = datadict[domnitem][c][index]
                    datum = caffe_pb2.Datum()
                    datum.channels = 3
                    datum.height = data.shape[1]
                    datum.width = data.shape[2]
                    datum.data = data.tostring()
                    datum.label = c
                    str_id = '{:08}'.format(count)
                    txn.put(str_id, datum.SerializeToString())
                    count += 1
                    if count % 1000 == 0:
                        print('train: already handled with {} samples'.format(
                            count))
                        txn.commit()
                        txn = env.begin(write=True)
                    counter[domnitem][c] += 1
                    checker = True
    txn.commit()
    env.close()

    tranplan, testplan = getPlan()
    map_size = 4000 * 128 * 8
    env = lmdb.open('image_train_domain_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    counter = {}
    checker = True
    while checker:
        checker = False
        for d, domnitem in enumerate(tranplan):
            if domnitem not in counter:
                counter[domnitem] = {}
            for c in tranplan[domnitem]:
                if c not in counter[domnitem]:
                    counter[domnitem][c] = 0
                if counter[domnitem][c] < tranplan[domnitem][c]:
                    #index = counter[domnitem][c] % len(datadict[domnitem][c]);
                    data = np.array([[[0]]])
                    datum = caffe_pb2.Datum()
                    datum.channels = 3
                    datum.height = data.shape[1]
                    datum.width = data.shape[2]
                    datum.data = data.tostring()
                    datum.label = d
                    str_id = '{:08}'.format(count)
                    txn.put(str_id, datum.SerializeToString())
                    count += 1
                    if count % 1000 == 0:
                        print('train: already handled with {} samples'.format(
                            count))
                        txn.commit()
                        txn = env.begin(write=True)
                    counter[domnitem][c] += 1
                    checker = True
    txn.commit()
    env.close()
import lmdb
import cv2
import numpy as np
from caffe.proto import caffe_pb2

lmdb_train = '../data/mnist/mnist_train_lmdb'
lmdb_test = '../data/mnist/mnist_test_lmdb'
lmdb_train_new = '../data/mnist/mnist_500'
# train_cnt_list = [50,50,50,2000,4000,10,100,1000,2000,4000]
train_cnt_list = [50, 50, 50, 50, 50, 50, 50, 50, 50, 50]

# read from the training lmdb
lmdb_env_read = lmdb.open(lmdb_train)
lmdb_txn_read = lmdb_env_read.begin()
lmdb_cursor_read = lmdb_txn_read.cursor()
datum_read = caffe_pb2.Datum()
write_train_lmdb_env = lmdb.open(lmdb_train_new, map_size=int(1e8))
write_train_lmdb_txn = write_train_lmdb_env.begin(write=True)
id_list = [-1] * 10
id_train = -1
id_test = -1
batch_size = 1000

for key, value in lmdb_cursor_read:
    datum_read.ParseFromString(value)
    label = datum_read.label
    data = caffe.io.datum_to_array(datum_read)
    id_list[label] += 1
    if id_list[label] < train_cnt_list[label]:
        id_train += 1
        keystr = '{:0>8d}'.format(id_train)
Пример #16
0
def test():
    import time
    labels = load_label()
    deploy_prototxt = "./models/bvlc_googlenet-modified/deploy-color.prototxt"
    weight_file = "./models/bvlc_googlenet-modified/snapshot/googlenet__iter_55000.caffemodel"
    test_lmdb = lmdb.open("./create_lmdb/test_color_lmdb_200")
    test_lmdb_txn = test_lmdb.begin()
    test_lmdb_cursor = test_lmdb_txn.cursor()
    batch_size = 300
    new_deploy_file = get_dynamic_batch_size(batch_size, deploy_prototxt)
    caffe.set_mode_gpu()
    caffe.set_device(0)
    net = caffe.Net(new_deploy_file, weight_file, caffe.TEST)
    datum = caffe_pb2.Datum()
    total, main_acc, sub_acc = 0, 0, 0
    index = 0
    key_name = []
    ground_truth_label = np.zeros((batch_size, ), dtype=np.float32)
    start_time = time.time()
    for key, value in test_lmdb_cursor:
        datum.ParseFromString(value)
        label = datum.label
        key_name.append(key)
        ground_truth_label[index] = label
        img = caffe.io.datum_to_array(datum)
        img = np.transpose(img, (1, 2, 0))
        img = cv2.resize(img, (224, 224)).astype(np.float32)
        img = np.transpose(img, (2, 0, 1))
        #img = img[:,1:225, 1:225].astype(np.float32)
        mean = np.array([126.88187408, 137.67976379 , 162.782653810])
        mean = mean[:, np.newaxis, np.newaxis]
        img -= mean
        net.blobs['data'].data[index] = img
        index = index + 1
        total = total + 1
        if index == batch_size:
            print total, time.time()-start_time
            out = net.forward()
            predict_labels = np.argmax(out['prob'], axis=1)
            for idx in range(batch_size):
                if ground_truth_label[idx] == predict_labels[idx]:
                    sub_acc = sub_acc+1
                    main_acc = main_acc+1
                else:
                    label_name = labels[ground_truth_label[idx]]
                    predict_label_name = labels[predict_labels[idx]]
                    if label_name.split('_')[2] == predict_label_name.split('_')[2]:
                        main_acc = main_acc+1
                    predict_log.info('{} {} {} {} {}'.format(
                                                    key_name[idx],
                                                    label_name, predict_label_name,
                                                    label_name.split('_')[2], predict_label_name.split('_')[2]
                                                    )
                                     )
            index = 0
            key_name = []
            
    if index!=0:
        out = net.forward()
        predict_labels = np.argmax(out['prob'], axis=1)
        for idx in range(index):
            if ground_truth_label[idx] == predict_labels[idx]:
                sub_acc = sub_acc+1
                main_acc = main_acc+1
            else:
                label_name = labels[ground_truth_label[idx]]
                predict_label_name = labels[predict_labels[idx]]
                if label_name.split('_')[2] == predict_label_name.split('_')[2]:
                    main_acc = main_acc+1
                predict_log.info('{} {} {} {} {}'.format(
                                                key_name[idx],
                                                label_name, predict_label_name,
                                                label_name.split('_')[2], predict_label_name.split('_')[2]
                                                )
                                 )
    predict_log.info('{} {} {}'.format(total, sub_acc*1.0/total, main_acc*1.0/total))
    print 'finished', time.time()-start_time, batch_size
Пример #17
0
def convertData2Lmdb(datapath):
    datadict = {}
    domnlist = [
        x for x in os.listdir(datapath)
        if os.path.isdir(os.path.join(datapath, x))
    ]
    domnlist.sort()
    for domnitem in domnlist:
        labllist = [
            x for x in os.listdir(os.path.join(datapath, domnitem))
            if os.path.isdir(os.path.join(datapath, domnitem, x))
        ]
        labllist.sort()
        for c, lablitem in enumerate(labllist):
            lablpath = os.path.join(datapath, domnitem, lablitem)
            filelist = glob.iglob(os.path.join(lablpath, '*.jpg'))
            filenumb = 0
            for f in filelist:
                dictkeys = '{}_{}_{}'.format(filenumb, domnitem, c)
                img = cv2.imread(f)
                img = cv2.resize(img, (256, 256))
                img = img.transpose((2, 0, 1))
                datadict[dictkeys] = img
                filenumb = filenumb + 1
    # infodict = getInfoFromDict(datadict)
    # plantupl = getPlanFromDict(infodict)
    tranplan, testplan = getPlan()
    map_size = tranplan[0][0] * 196608 * 8
    env = lmdb.open('image_train_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    for dictkeys in datadict:
        keywords = dictkeys.split('_')
        if keywords[1] not in tranplan: continue
        if int(keywords[2]) not in tranplan[keywords[1]]: continue
        if tranplan[keywords[1]][int(keywords[2])] == 0: continue
        tranplan[keywords[1]][int(keywords[2])] -= 1
        data = datadict[dictkeys]
        label = int(keywords[2])
        ## data
        datum = caffe_pb2.Datum()
        datum.channels = 3
        datum.height = data.shape[1]
        datum.width = data.shape[2]
        datum.data = data.tostring()
        datum.label = label
        str_id = '{:08}'.format(count)
        txn.put(str_id, datum.SerializeToString())
        count += 1
        if count % 1000 == 0 or count == tranplan[0][0]:
            print('train: already handled with {} samples'.format(count))
            txn.commit()
            txn = env.begin(write=True)
        if count == tranplan[0][0]: break
    txn.commit()
    env.close()

    map_size = testplan[0][0] * 196608 * 8
    env = lmdb.open('image_test_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    for dictkeys in datadict:
        keywords = dictkeys.split('_')
        if keywords[1] not in testplan: continue
        if int(keywords[2]) not in testplan[keywords[1]]: continue
        if testplan[keywords[1]][int(keywords[2])] == 0: continue
        testplan[keywords[1]][int(keywords[2])] -= 1
        data = datadict[dictkeys]
        label = int(keywords[2])
        ## data
        datum = caffe_pb2.Datum()
        datum.channels = 3
        datum.height = data.shape[1]
        datum.width = data.shape[2]
        datum.data = data.tostring()
        datum.label = label
        str_id = '{:08}'.format(count)
        txn.put(str_id, datum.SerializeToString())
        count += 1
        if count % 1000 == 0 or count == testplan[0][0]:
            print('test: already handled with {} samples'.format(count))
            txn.commit()
            txn = env.begin(write=True)
        if count == testplan[0][0]: break
    txn.commit()
    env.close()

    dom2numb = {}
    tranplan, testplan = getPlan()
    map_size = tranplan[0][0] * 128 * 8
    env = lmdb.open('image_train_domain_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    for dictkeys in datadict:
        keywords = dictkeys.split('_')
        if keywords[1] not in tranplan: continue
        if int(keywords[2]) not in tranplan[keywords[1]]: continue
        if tranplan[keywords[1]][int(keywords[2])] == 0: continue
        if keywords[1] not in dom2numb:
            numb = len(dom2numb)
            dom2numb[keywords[1]] = numb
        tranplan[keywords[1]][int(keywords[2])] -= 1
        data = [[[]]]
        label = -1
        if dom2numb[keywords[1]] == source_1: label = 0
        if dom2numb[keywords[1]] == source_2: label = 1
        #for dictvals in datadict[dictkeys] :
        #for dataelem in dictvals :
        #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini
        #data[0][0].append(float(dataelem))
        data[0][0].append(float(dom2numb[keywords[1]]))
        dataarr = np.array(data)
        datum = caffe_pb2.Datum()
        datum = caffe.io.array_to_datum(dataarr, label)
        str_id = '{:08}'.format(count)
        txn.put(str_id, datum.SerializeToString())
        count += 1
        if count % 1000 == 0 or count == tranplan[0][0]:
            print('train: already handled with {} samples'.format(count))
            txn.commit()
            txn = env.begin(write=True)
        if count == tranplan[0][0]: break
    txn.commit()
    env.close()
def main(argv):
    db_path_label = ''
    db_path_feats = ''
    mat_file = ''
    print argv
    try:
        opts, args = getopt.getopt(argv, "t:l:r:g:m", [
            "test_label_db=", "test_feature_db=", "train_label_db=",
            "train_feature_db=", "mat_file="
        ])
    except getopt.GetoptError:
        print 'feature_LDB_to_mat.py -t <test_label_db> -l <test_feature_db> -r <train_label_db> -g <train_feature_db> -m <output_mat_file>'
        sys.exit(2)

    print opts
    print args

    for opt, arg in opts:
        if opt in ("-t", "--test_label_db"):
            db_path_test_label = arg
        elif opt in ("-l", "--test_feature_db"):
            db_path_test_feats = arg
        if opt in ("-r", "--train_label_db"):
            db_path_train_label = arg
        elif opt in ("-g", "--train_feature_db"):
            db_path_train_feats = arg
        elif opt in ("-m", "--mat_file"):
            mat_file = arg
        #print arg+" "+opt

    #print(db_path_label)
    #print(db_path_feats)
    #print(mat_file)

    if not os.path.exists(db_path_test_label):
        raise Exception('db test label not found')
    if not os.path.exists(db_path_test_feats):
        raise Exception('db test feature not found')
    if not os.path.exists(db_path_train_label):
        print 'db_path_rain_label  is:' + db_path_train_label
        raise Exception('db train label not found')
    if not os.path.exists(db_path_train_feats):
        raise Exception('db train feature not found')

    db_test_label = leveldb.LevelDB(db_path_test_label)
    db_test_feats = leveldb.LevelDB(db_path_test_feats)
    db_train_label = leveldb.LevelDB(db_path_train_label)
    db_train_feats = leveldb.LevelDB(db_path_train_feats)
    #window_num =686
    datum = caffe_pb2.Datum()
    datum_lb = caffe_pb2.Datum()
    start = time.time()
    #ft = np.zeros((window_num, float(81)))
    #ft = np.zeros((window_num, float(100352)))
    #lb = np.zeros((window_num, float(81)))
    window_num = 0
    for key in db_test_feats.RangeIter(include_value=False):
        window_num = window_num + 1

    n = 0
    for key, value in db_test_feats.RangeIter():
        n = n + 1
        #f_size=len(value)
        datum.ParseFromString(value)
        f_size = len(datum.float_data)
        if n > 0:
            break
    n = 0
    for key, value in db_test_label.RangeIter():
        n = n + 1
        #l_size=len(value)
        datum.ParseFromString(value)
        l_size = len(datum.float_data)
        if n == 1:
            break
    te_ft = np.zeros((window_num, float(f_size)))
    te_lb = np.zeros((window_num, float(l_size)))

    window_num = 0
    for key in db_train_feats.RangeIter(include_value=False):
        window_num = window_num + 1
    n = 0
    for key, value in db_train_feats.RangeIter():
        n = n + 1
        #f_size=len(value)
        datum.ParseFromString(value)
        f_size = len(datum.float_data)
        if n > 0:
            break
    n = 0
    for key, value in db_train_label.RangeIter():
        n = n + 1
        #l_size=len(value)
        datum.ParseFromString(value)
        l_size = len(datum.float_data)
        if n == 1:
            break
    tr_ft = np.zeros((window_num, float(f_size)))
    tr_lb = np.zeros((window_num, float(l_size)))

    # for im_idx in range(window_num):
    count = 0
    for key in db_test_feats.RangeIter(include_value=False):
        datum.ParseFromString(db_test_feats.Get(key))
        datum_lb.ParseFromString(db_test_label.Get(key))
        te_ft[count, :] = datum.float_data
        te_lb[count, :] = datum_lb.float_data
        count = count + 1
        print 'convert feature # : %d key is %s' % (count, key)

        count = 0
    for key in db_train_feats.RangeIter(include_value=False):
        datum.ParseFromString(db_train_feats.Get(key))
        datum_lb.ParseFromString(db_train_label.Get(key))
        tr_ft[count, :] = datum.float_data
        tr_lb[count, :] = datum_lb.float_data
        count = count + 1

    print 'time 1: %f' % (time.time() - start)
    prob = problem(tr_lb[1, :], tr_ft)
    m = train(prob, '-c 4')
    p_label, p_acc, p_val = predict(te_lb[:], te, m)
    print 'done!'
Пример #19
0
import lmdb
import numpy as np
from caffe.proto import caffe_pb2



npy_path ='/home/zhaoliu/car_brand/lmdb_data_new/train.npy'
npy_path_label = '/home/zhaoliu/car_brand/datasets_new/tongji/train_tongji.npy'
lmdb_path ="/mnt/disk/zhaoliu_data/small_car_lmdb/train_val_lmdb"

txt = '/home/zhaoliu/car_brand/lmdb_data_new/weight_txt/weight.txt'

lmdb_env=lmdb.open(lmdb_path)
lmdb_txn=lmdb_env.begin()
datum = caffe_pb2.Datum()

keys_list = np.load(npy_path).tolist()
label_list= np.load(npy_path_label).tolist()


f = open(txt,'a')

# i = 0

txt_list = []

print(len(keys_list))
print(len(label_list))

# for i in range(len(keys_list)):
Пример #20
0
def generate_arrays_from_file(params_transform, params_train):

    path = params_train['lmdb_path']
    batch_size = params_train['batch_size']

    lmdb_env = lmdb.open(path, readonly=True)
    with lmdb_env.begin() as lmdb_txn:
        lmdb_cursor = lmdb_txn.cursor()
        datum = caffe_pb2.Datum()
        cnt = 0
        X = []
        Y = []
        GT = []
        # print('================================  load a new batch  \n')
        while (True):
            lmdb_cursor.first()
            for idx, (key, value) in enumerate(lmdb_cursor):

                datum.ParseFromString(value)
                label = datum.label
                data = caffe.io.datum_to_array(datum)

                cocoImg = COCOLmdb(data, params_transform)
                # cocoImg.set_meta_data()
                cocoImg.add_neck()
                # cocoImg.visualize()
                cocoImg.aug_scale()
                # cocoImg.visualize()
                cocoImg.aug_croppad()
                # cocoImg.visualize()
                cocoImg.aug_flip()
                # cocoImg.visualize()
                cocoImg.set_ground_truth()
                # cocoImg.visualize_pafs_single_figure()
                sample, label, gt = cocoImg.get_sample_label()
                # print(sample.shape,label.shape)
                # cocoImg.visualize_heat_maps()
                # cocoImg.visualize()
                # cocoImg.aug_scale()
                # cocoImg.aug_croppad()
                # img,anno,mask_miss,mask_all = cocoImg.get_meta_data()
                # print(type(img))
                # img = np.rollaxis(img,0,3)

                # cocoImg.add_neck()
                # pprint.pprint(cocoImg.anno)
                # visualize_body_part(img,anno['joint_others'])
                # print(anno['joint_others'].shape)
                # cocoImg.aug_scale()
                # cocoImg.visualize()
                X.append(sample)
                Y.append(label)
                gt = np.zeros((1, ))
                GT.append(gt)
                cnt += 1
                if cnt == batch_size:
                    cnt = 0
                    X = np.array(X)
                    Y = np.array(Y)
                    # GT = np.array(GT)
                    # GT = np.zeros((1,1,1,1))
                    # GTs = [GT for i in range(6)]
                    GT = np.array(GT)
                    GTs = [GT for i in range(6)]
                    # print(X.shape,Y.shape)
                    yield (dict(image=X, label=Y), GTs)
                    X = []
                    Y = []
                    GT = []
test_labels= np.array(test_labels)
print("\t\t"+ str(len(test_labels))+" successfully labels loaded.\n")


#######################
##   Load Features   ##
#######################

#train
print("Loading features from train_"+layer+"_"+net+ " database...")
db1 = lmdb.open("train_"+layer+"_"+net)
txn1 = db1.begin()
kvpairs1 = list(txn1.cursor().iternext(keys=True, values=True))

blob1 = cpb.Datum()
for key, value in kvpairs1:
    blob1.ParseFromString(value)
    feature_vector = np.array(blob1.float_data)
    train_features.append(feature_vector)

train_features= np.array(train_features)        #converting back to np array
print("\t\tfeatures successfully saved!\n")

#test
print("Loading features from test_"+layer+"_"+net+ " database...")
db2 = lmdb.open("test_"+layer+"_"+net)
txn2 = db2.begin()
kvpairs2 = list(txn2.cursor().iternext(keys=True, values=True))

blob2 = cpb.Datum()
Пример #22
0
def convertData2Lmdb(datapath):
    datadict = sio.loadmat(datapath)
    datadict.pop('__version__')
    datadict.pop('__header__')
    datadict.pop('__globals__')
    infodict = getInfoFromDict(datadict)
    plantupl = getPlanFromDict(infodict)
    #scaltupl = getScalFromPlan(datadict, plantupl)
    #seeScalFromTupl(scaltupl)
    #datamini = scaltupl[0][4]
    #datamaxi = scaltupl[1][4]
    #usermini = int(input('please input the minimal value for scaling: '))
    #usermaxi = int(input('please input the maximal value for scaling: '))

    tranplan, testplan = cpyPlanFromPlan(plantupl)
    map_size = tranplan[0][0] * 20659 * 8
    env = lmdb.open('gene_train_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    for dictkeys in datadict:
        keywords = dictkeys.split('_')
        if keywords[1] not in tranplan: continue
        if keywords[2] not in tranplan[keywords[1]]: continue
        if tranplan[keywords[1]][keywords[2]] == 0: continue
        tranplan[keywords[1]][keywords[2]] -= 1
        data = [[[]]]
        label = 0 if keywords[2] == 'F' else 1
        for dictvals in datadict[dictkeys]:
            for dataelem in dictvals:
                #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini
                data[0][0].append(float(dataelem))
        dataarr = np.array(data)
        datum = caffe_pb2.Datum()
        datum = caffe.io.array_to_datum(dataarr, label)
        str_id = '{:08}'.format(count)
        txn.put(str_id, datum.SerializeToString())
        count += 1
        if count % 1000 == 0 or count == tranplan[0][0]:
            print('train: already handled with {} samples'.format(count))
            txn.commit()
            txn = env.begin(write=True)
        if count == tranplan[0][0]: break
    txn.commit()
    env.close()

    map_size = testplan[0][0] * 20659 * 8
    env = lmdb.open('gene_test_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    for dictkeys in datadict:
        keywords = dictkeys.split('_')
        if keywords[1] not in testplan: continue
        if keywords[2] not in testplan[keywords[1]]: continue
        if testplan[keywords[1]][keywords[2]] == 0: continue
        testplan[keywords[1]][keywords[2]] -= 1
        data = [[[]]]
        label = 0 if keywords[2] == 'F' else 1
        for dictvals in datadict[dictkeys]:
            for dataelem in dictvals:
                #dataelem = (dataelem - datamini) / (datamaxi - datamini) * (usermaxi - usermini) + usermini
                data[0][0].append(float(dataelem))
        dataarr = np.array(data)
        datum = caffe_pb2.Datum()
        datum = caffe.io.array_to_datum(dataarr, label)
        str_id = '{:08}'.format(count)
        txn.put(str_id, datum.SerializeToString())
        count += 1
        if count % 1000 == 0 or count == testplan[0][0]:
            print('test: already handled with {} samples'.format(count))
            txn.commit()
            txn = env.begin(write=True)
        if count == testplan[0][0]: break
    txn.commit()
    env.close()
def main(leveldb_dir, limit):
    datum = caffe_pb2.Datum()
    db = leveldb.LevelDB(leveldb_dir)
    for i in range(0, limit):
        datum.ParseFromString(db.Get(str(i)))
        print datum.float_data, datum.label
Пример #24
0
def find_image_dimension(leveldb):
    _, v = leveldb.iterator().next()
    datum = caffe_pb2.Datum()
    datum.ParseFromString(v)

    return datum.height, datum.width, datum.channels
def main():
    train=sio.loadmat('data_svhn/train_32x32.mat')
    test=sio.loadmat('data_svhn/test_32x32.mat')

    train_data=train['X']
    train_label=train['y']
    test_data=test['X']
    test_label=test['y']

    train_data = np.swapaxes(train_data, 0, 3)
    train_data = np.swapaxes(train_data, 1, 2)
    train_data = np.swapaxes(train_data, 2, 3)

    test_data = np.swapaxes(test_data, 0, 3)
    test_data = np.swapaxes(test_data, 1, 2)
    test_data = np.swapaxes(test_data, 2, 3)

    N=train_label.shape[0]
    map_size=train_data.nbytes*10
    env=lmdb.open('svhn_train_lmdb',map_size=map_size)
    txn=env.begin(write=True)

    #shuffle the training data
    r=list(range(N))
    random.shuffle(r)

    count=0
    for i in r:
        datum=caffe_pb2.Datum()
        label=int(train_label[i][0])
        if label==10:
            label=0
        datum=caffe.io.array_to_datum(train_data[i],label)
        str_id='{:08}'.format(count)
        txn.put(str_id,datum.SerializeToString())

        count += 1
        if count % 1000 == 0:
            print('already handled with {} pictures'.format(count))
            txn.commit()
            txn = env.begin(write=True)

    txn.commit()
    env.close()

    map_size = test_data.nbytes * 10
    env = lmdb.open('svhn_test_lmdb', map_size=map_size)
    txn = env.begin(write=True)
    count = 0
    for i in range(test_label.shape[0]):
        datum = caffe_pb2.Datum()
        label = int(test_label[i][0])
        if label == 10:
            label = 0
        datum = caffe.io.array_to_datum(test_data[i], label)
        str_id = '{:08}'.format(count)
        txn.put(str_id, datum.SerializeToString())

        count += 1
        if count % 1000 == 0:
            print('already handled with {} pictures'.format(count))
            txn.commit()
            txn = env.begin(write=True)

    txn.commit()
    env.close()
Пример #26
0
def make_datum(img, label):
    return caffe_pb2.Datum(channels=3,
                           width=SUB_W,
                           height=SUB_H,
                           label=label,
                           data=np.rollaxis(img, 2).tostring())
Пример #27
0
mkdir_if_missing(lmdb_file)
batch_size = 200  #lmdb对于数据进行的是先缓存后一次性写入从而提高效率,因此定义一个batch_size控制每次写入的量
channel = 6
resize_height = 160
resize_width = 72

# get file_name from txt
#txt_file = '/home/jiening/dgd_person_reid/external/exp/db/SYSU_3/val.txt'
txt_file = '/home/jiening/dgd_person_reid/external/exp/db/SYSU_3/train.txt'
data_b_path = '/home/jiening/dgd_person_reid/external/exp/datasets/SYSU_3_b_x2'
data_path = '/home/jiening/dgd_person_reid/external/exp/datasets/SYSU_3'

# create the leveldb file
lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))  #生成一个数据文件,定义最大空间
lmdb_txn = lmdb_env.begin(write=True)  #打开数据库的句柄
datum = caffe_pb2.Datum()  #这是caffe中定义数据的重要类型

f = open(txt_file, 'r')
file_length = len(f.readlines())
print(file_length)
f.close()

# img_new = np.zeros((channel, resize_height, resize_width))
f = open(txt_file, 'r')
count = 0
count_cam1 = 0
for line in open(txt_file):
    count += 1
    line = f.readline()
    # print line
    line = line.split(' ')  # divide the file_name and label
Пример #28
0
def make_datum(img, label):
    return caffe_pb2.Datum(channels=3,
                           width=IMAGE_WIDTH,
                           height=IMAGE_HEIGHT,
                           label=label,
                           data=np.rollaxis(img, 2).tostring())
def convert_imageid2shapeid(datum_string, def_param=imageid2shapeid_mapping):
    datum = caffe_pb2.Datum()
    datum.ParseFromString(datum_string)
    datum.label = imageid2shapeid_mapping[datum.label]
    assert(datum.label != -1)
    return datum.SerializeToString()
def main(argv):
    db_path_label = ''
    db_path_feats = ''
    mat_file = ''
    print argv
    try:
        opts, args = getopt.getopt(argv, "l:f:o",
                                   ["label_db=", "feature_db=", "mat_file="])
    except getopt.GetoptError:
        print 'feature_LDB_to_mat.py -l <label_db> -f <feature_db> -m <output_mat_file>'
        sys.exit(2)

    print opts
    print args

    for opt, arg in opts:
        if opt in ("-l", "--label_db"):
            db_path_label = arg
        elif opt in ("-f", "--feature_db"):
            db_path_feats = arg
        elif opt in ("-o", "--mat_file"):
            mat_file = arg
        print arg + " " + opt

    print(db_path_label)
    print(db_path_feats)
    print(mat_file)

    if not os.path.exists(db_path_label):
        raise Exception('db label not found')
    if not os.path.exists(db_path_feats):
        raise Exception('db feature not found')

    db_label = leveldb.LevelDB(db_path_label)
    db_feats = leveldb.LevelDB(db_path_feats)
    #window_num =686
    datum = caffe_pb2.Datum()
    # print 'show datum: ' + str(len(datum))
    datum_lb = caffe_pb2.Datum()
    start = time.time()
    #ft = np.zeros((window_num, float(81)))
    #ft = np.zeros((window_num, float(100352)))
    #lb = np.zeros((window_num, float(81)))
    is_float_data = True
    window_num = 0
    for key in db_feats.RangeIter(include_value=False):
        window_num = window_num + 1

    print 'window_num = ' + str(window_num)
    n = 0
    for key, value in db_feats.RangeIter():
        n = n + 1
        if n > 1:
            break
        #f_size=len(value)
        datum.ParseFromString(db_feats.Get(key))
        f_size = len(datum.float_data)
        # print 'show datum: ' + str(len(datum.float_data))
        if f_size == 0:
            f_size = len(datum.data)
            is_float_data = False
        print f_size

    n = 0
    for key, value in db_label.RangeIter():
        n = n + 1
        if n > 1:
            break
        #l_size=len(value)
        datum.ParseFromString(value)
        l_size = len(datum.float_data)
    ft = np.zeros((window_num, float(f_size)))
    lb = np.zeros((window_num, float(l_size)))
    #ft = np.zeros((10, float(f_size)))
    #lb = np.zeros((10, float(l_size)))
    count = 0
    for key in db_feats.RangeIter(include_value=False):
        datum.ParseFromString(db_feats.Get(key))
        datum_lb.ParseFromString(db_label.Get(key))
        print 'show label'
        print datum_lb
        if f_size > 0:
            if is_float_data:
                ft[count, :] = datum.float_data
            else:
                ft[count, :] = datum.data
        lb[count, :] = datum_lb.float_data
        print 'convert feature # : %d key is %s' % (count, key)
        count = count + 1
    #print 'show datum: ' + str(len(datum.data))
    #print 'show datum: ' + str(len(datum.float_data))

    print 'time 1: %f' % (time.time() - start)
    data = {
        u'feat_label': {
            u'feat': ft,
            u'label': lb,
        }
    }
    print 'save result to : %s' % (mat_file)
    hdf5storage.savemat(mat_file, data, format='7.3')
    print 'time 2: %f' % (time.time() - start)
    print 'done!'