Exemplo n.º 1
0
    def __init__(self, params):

        self.batch_size = params['batch_size']
        self.outshape = params['shape']
        self.classes_per_batch = params['classes_per_batch']

        self.img_lmdb = lmdbs(params['img_source'])
        if params['skt_source'].endswith('.pkl'):
            self.skt_lmdb = svgs(params['skt_source'])
        else:
            self.skt_lmdb = lmdbs(params['skt_source'])
        self.img_labels = self.img_lmdb.get_label_list()
        self.skt_labels = self.skt_lmdb.get_label_list()
        self.img_mean = biproto2py(params['mean_file']).squeeze()

        self.num_classes = len(set(self.skt_labels))
        assert len(self.skt_labels)%self.num_classes==0, \
          'Unequal sketch training samples for each class'
        self.skt_per_class = len(self.skt_labels) / self.num_classes

        if 'hard_pos' in params:
            self.hard_sel = 'hard_pos'
            self.hard_sel_file = params['hard_pos']
        elif 'hard_neg' in params:
            self.hard_sel = 'hard_neg'
            self.hard_sel_file = params['hard_neg']
        elif 'hard_pn' in params:
            self.hard_sel = 'hard_pn'
            self.hard_sel_file = params['hard_pn']
        else:
            assert False, 'Hard selection must be on'

        self.watchChange = WatchDog(
            self.hard_sel_file)  #check if file has been updated
        self.hardsel_tab = np.load(self.hard_sel_file)
        self.hard_pos = self.hardsel_tab['pos']
        self.hard_neg = self.hardsel_tab['neg']

        self.img_labels_dict, self.classes = vec2dic(self.img_labels)
        self.NSKTS = len(self.skt_labels)
        self.indexlist = range(self.NSKTS)
        self.shuffle_keeping_min_classes_per_batch()
        self._cur = 0  # current image

        # this class does some simple data-manipulations
        self.img_augment = SimpleAugment(mean=self.img_mean,
                                         shape=params['shape'],
                                         scale=params['scale'],
                                         rot=params['rot'])

        if 'verbose' not in params:
            print "BatchLoader initialized with {} sketches, {} images of {} classes".format(
                len(self.skt_labels), len(self.img_labels), self.num_classes)
            print('Hard selection: {}'.format(self.hard_sel))
        #create threadpools for parallel augmentation
        self.pool = ThreadPool()  #4
Exemplo n.º 2
0
    def __init__(self, params):

        self.batch_size = params['batch_size']
        self.outshape = params['shape']
        self.classes_per_batch = params['classes_per_batch']

        self.img_lmdb = lmdbs(params['img_source'])
        if params['skt_source'].endswith('.pkl'):
            self.skt_lmdb = svgs(params['skt_source'])
        else:
            self.skt_lmdb = lmdbs(params['skt_source'])
        self.img_labels = self.img_lmdb.get_label_list()
        self.skt_labels = self.skt_lmdb.get_label_list()
        self.img_mean = biproto2py(params['mean_file']).squeeze()

        self.num_classes = len(set(self.skt_labels))
        assert len(self.skt_labels)%self.num_classes==0, \
          'Unequal sketch training samples for each class'
        self.skt_per_class = len(self.skt_labels) / self.num_classes

        self.hard_sel = 0

        self.img_labels_dict, self.classes = vec2dic(self.img_labels)

        self.indexlist = range(len(self.skt_labels))
        self.shuffle_keeping_min_classes_per_batch()
        self._cur = 0  # current image

        # this class does some simple data-manipulations
        self.img_augment = SimpleAugment(mean=self.img_mean,
                                         shape=params['shape'],
                                         scale=params['scale'],
                                         rot=params['rot'])

        if 'verbose' not in params:
            print "BatchLoader initialized with {} sketches, {} images of {} classes".format(
                len(self.skt_labels), len(self.img_labels), self.num_classes)
        #create threadpools for parallel augmentation
        self.pool = ThreadPool()  #4
 def __init__(self, img_dir='', img_lst='', lmdb=''):
     self.have_img_lst = self.have_img_dir = self.have_lmdb = False
     self.hardsel = None
     if img_lst:
         helps = helper()
         self.img_lst = helps.read_list(img_lst,
                                        delimeter=',',
                                        keep_original=False)
         self.have_img_lst = True
     if img_dir:
         assert os.path.isdir(img_dir), 'Opps. img_dir is not a dir'
         self.img_dir = img_dir
         self.have_img_dir = True
     if lmdb:
         self.lmdb = lmdbs(lmdb)
         self.have_lmdb = True
Exemplo n.º 4
0
 def __init__(self, params):
   
   self.batch_size = params['batch_size']
   self.outshape = params['shape']
   
   self.lmdb = lmdbs(params['source'])
   self.labels = self.lmdb.get_label_list()
   self.img_mean = biproto2py(params['mean_file']).squeeze()
   
   self.NIMGS = len(self.labels)
   
   self.num_batches = int(np.ceil(self.NIMGS/float(self.batch_size)))
   self._cur = 0  # current batch
   
   # this class does some simple data-manipulations
   self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'],
                                    scale = params['scale'])
Exemplo n.º 5
0
 def __init__(self, params):
   
   self.batch_size = params['batch_size']
   self.outshape = params['shape']
   
   self.lmdb = lmdbs(params['source'])
   self.labels = self.lmdb.get_label_list()
   self.img_mean = biproto2py(params['mean_file']).squeeze()
   
   self.NIMGS = len(self.labels)
   assert self.NIMGS%self.batch_size==0,'NIMGS {} not dividible by batchsize {}'.format(
          self.NIMGS,self.batch_size)
   
   self.num_batches = self.NIMGS/self.batch_size
   self._cur = 0  # current batch
   self.labels_tab = self.labels.reshape((self.num_batches,self.batch_size))
   
   # this class does some simple data-manipulations
   self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'],
                                    scale = params['scale'])
Exemplo n.º 6
0
def lmdb_get_info(DB):
    """
  get information about an lmdb
  IN:   DB    can be either a true lmdb or a python pickle
  includes: number of classes, number of sample per class, number of samples
  """
    if DB.endswith('.pkl'):
        helps = helper()
        data = helps.load(DB, 1)  #load the first variable which is the labels
        labels = data['labels']
    else:
        lmdb_ = lmdbs(DB)
        labels = lmdb_.get_label_list()
    out = {}
    out['num_classes'] = len(set(labels))
    out['num_samples'] = len(labels)
    if out['num_samples'] % out['num_classes'] != 0:
        print 'We got an unbalance lmdb having {} samples of {} classes'.format(\
          out['num_samples'],out['num_classes'])
    out['samples_per_class'] = out['num_samples'] / out['num_classes']
    return out
Exemplo n.º 7
0
    def __init__(self, params):

        self.batch_size = params['batch_size']
        self.img_shape = params['shape']
        self.classes_per_batch = params['classes_per_batch']

        self.img_lmdb = lmdbs(params['img_source'])
        if params['skt_source'].endswith('.pkl'):
            self.skt_lmdb = svgs(params['skt_source'])
        else:
            self.skt_lmdb = lmdbs(params['skt_source'])
        self.img_labels = self.img_lmdb.get_label_list()
        self.skt_labels = self.skt_lmdb.get_label_list()
        label_ids = list(set(self.img_labels))
        NCATS = len(label_ids)
        if label_ids[0] != 0 or label_ids[-1] != NCATS - 1:
            if 'verbose' not in params:
                print 'Your data labels are not [0:{}]. Converting label ...'.format(
                    NCATS - 1)
            self.img_labels = [
                label_ids.index(label) for label in self.img_labels
            ]
            self.skt_labels = [
                label_ids.index(label) for label in self.skt_labels
            ]

        self.img_mean = biproto2py(params['mean_file']).squeeze()
        #self.skt_mean = biproto2py(params['skt_mean']).squeeze()

        self.num_classes = len(set(self.skt_labels))
        assert self.num_classes == NCATS, 'XX!!Sketch & image datasets unequal #categories'
        assert len(self.skt_labels)%self.num_classes==0, \
          'Unequal sketch training samples for each class'
        self.skt_per_class = len(self.skt_labels) / self.num_classes

        if 'hard_pos' in params:
            self.hard_sel = 1
            self.hard_pos = np.load(params['hard_pos'])['pos']
        elif 'hard_neg' in params:
            self.hard_sel = 2
            self.hard_neg = np.load(params['hard_neg'])['neg']
        elif 'hard_pn' in params:
            self.hard_sel = 3
            tmp = np.load(params['hard_pn'])
            self.hard_pos = tmp['pos']
            self.hard_neg = tmp['neg']
        else:  #hard selection turn off
            self.hard_sel = 0

        #self.img_labels_dict, self.classes = vec2dic(self.img_labels)

        self.indexlist = range(len(self.skt_labels))
        self.indexlist_img = range(len(self.img_labels))
        #self.shuffle_keeping_min_classes_per_batch()
        shuffle(self.indexlist)
        shuffle(self.indexlist_img)
        self._cur = 0  # current image
        self._cur_img = 0

        # this class does some simple data-manipulations
        self.img_augment = SimpleAugment(mean=self.img_mean,
                                         shape=self.img_shape,
                                         scale=params['scale'],
                                         rot=params['rot'])

        print "BatchLoader initialized with {} sketches, {} images of {} classes".format(
            len(self.skt_labels), len(self.img_labels), self.num_classes)
        #create threadpools for parallel augmentation
        self.pool = ThreadPool()  #4
Exemplo n.º 8
0
def extract_cnn_feat(net_params, DB, OUT, layer=0, verbose=True):
    """
  extract features from CNN

  DB: lmdb data you want to extract feature
  net_params: dictionary with keys "DEPLOY_PRO","data_mean",
    "WEIGHTS","scale_factor", batch_size
  OUT: save output in mat file
  layer: 0 for last layer, -1: one before the last layer, -2: ...
  """
    assert layer <= 0, 'layer should be a non-positive integer'
    DEPLOY_PRO = net_params['DEPLOY_PRO']
    WEIGHTS = net_params['WEIGHTS']
    scale_factor = net_params['scale_factor']
    data_mean = net_params['data_mean']
    batch_size = net_params['batch_size']

    net = caffe.Net(DEPLOY_PRO, WEIGHTS, caffe.TEST)
    if verbose:
        print 'Extracting cnn feats...'
        print '  Model def: {}\n  Weights: {}'.format(DEPLOY_PRO, WEIGHTS)
        start_t = time.time()
    db = lmdbs(DB)
    labels = db.get_label_list()
    NIMGS = labels.size

    img_mean = biproto2py(data_mean)
    inblob = net.inputs[0]
    in_dim = net.blobs[inblob].data.shape[1:]
    prep = SimpleAugment(mean=img_mean, shape=in_dim[-2:])

    feat_l = net.blobs.keys()[layer - 1]
    out_dim = net.blobs[feat_l].data.squeeze().shape[-1]
    feats = np.zeros((NIMGS, out_dim), dtype=np.float32)

    for i in xrange(0, NIMGS, batch_size):
        batch = range(i, min(i + batch_size, NIMGS))
        if verbose:
            print('Processing sample #{} - {}'.format(batch[0], batch[-1]))
        new_shape = (len(batch), ) + in_dim
        net.blobs[inblob].reshape(*new_shape)

        chunk = db.get_data(batch)
        net.blobs[inblob].data[...] = prep.augment_deploy(chunk)
        temp = net.forward()
        feats[batch] = net.blobs[feat_l].data.squeeze()

    #apply scale factor
    feats *= scale_factor

    if OUT.endswith('.mat'):
        py2mat(dict(feats=feats, labels=labels), OUT)
    elif OUT.endswith('.npz'):
        np.savez(OUT, feats=feats, labels=labels)
    else:  #assume it is pickle
        helps = helper()
        helps.save(OUT, feats=feats, labels=labels)
    net = None
    if verbose:
        end_t = time.time()
        print 'Save features to {}.'.format(OUT)
        print 'Time: {}\n'.format(timedelta(seconds=int(end_t - start_t)))