def __init__(self, params): self.batch_size = params['batch_size'] self.outshape = params['shape'] self.classes_per_batch = params['classes_per_batch'] self.img_lmdb = lmdbs(params['img_source']) if params['skt_source'].endswith('.pkl'): self.skt_lmdb = svgs(params['skt_source']) else: self.skt_lmdb = lmdbs(params['skt_source']) self.img_labels = self.img_lmdb.get_label_list() self.skt_labels = self.skt_lmdb.get_label_list() self.img_mean = biproto2py(params['mean_file']).squeeze() self.num_classes = len(set(self.skt_labels)) assert len(self.skt_labels)%self.num_classes==0, \ 'Unequal sketch training samples for each class' self.skt_per_class = len(self.skt_labels) / self.num_classes if 'hard_pos' in params: self.hard_sel = 'hard_pos' self.hard_sel_file = params['hard_pos'] elif 'hard_neg' in params: self.hard_sel = 'hard_neg' self.hard_sel_file = params['hard_neg'] elif 'hard_pn' in params: self.hard_sel = 'hard_pn' self.hard_sel_file = params['hard_pn'] else: assert False, 'Hard selection must be on' self.watchChange = WatchDog( self.hard_sel_file) #check if file has been updated self.hardsel_tab = np.load(self.hard_sel_file) self.hard_pos = self.hardsel_tab['pos'] self.hard_neg = self.hardsel_tab['neg'] self.img_labels_dict, self.classes = vec2dic(self.img_labels) self.NSKTS = len(self.skt_labels) self.indexlist = range(self.NSKTS) self.shuffle_keeping_min_classes_per_batch() self._cur = 0 # current image # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean, shape=params['shape'], scale=params['scale'], rot=params['rot']) if 'verbose' not in params: print "BatchLoader initialized with {} sketches, {} images of {} classes".format( len(self.skt_labels), len(self.img_labels), self.num_classes) print('Hard selection: {}'.format(self.hard_sel)) #create threadpools for parallel augmentation self.pool = ThreadPool() #4
def __init__(self, params): self.batch_size = params['batch_size'] self.outshape = params['shape'] self.classes_per_batch = params['classes_per_batch'] self.img_lmdb = lmdbs(params['img_source']) if params['skt_source'].endswith('.pkl'): self.skt_lmdb = svgs(params['skt_source']) else: self.skt_lmdb = lmdbs(params['skt_source']) self.img_labels = self.img_lmdb.get_label_list() self.skt_labels = self.skt_lmdb.get_label_list() self.img_mean = biproto2py(params['mean_file']).squeeze() self.num_classes = len(set(self.skt_labels)) assert len(self.skt_labels)%self.num_classes==0, \ 'Unequal sketch training samples for each class' self.skt_per_class = len(self.skt_labels) / self.num_classes self.hard_sel = 0 self.img_labels_dict, self.classes = vec2dic(self.img_labels) self.indexlist = range(len(self.skt_labels)) self.shuffle_keeping_min_classes_per_batch() self._cur = 0 # current image # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean, shape=params['shape'], scale=params['scale'], rot=params['rot']) if 'verbose' not in params: print "BatchLoader initialized with {} sketches, {} images of {} classes".format( len(self.skt_labels), len(self.img_labels), self.num_classes) #create threadpools for parallel augmentation self.pool = ThreadPool() #4
def __init__(self, img_dir='', img_lst='', lmdb=''): self.have_img_lst = self.have_img_dir = self.have_lmdb = False self.hardsel = None if img_lst: helps = helper() self.img_lst = helps.read_list(img_lst, delimeter=',', keep_original=False) self.have_img_lst = True if img_dir: assert os.path.isdir(img_dir), 'Opps. img_dir is not a dir' self.img_dir = img_dir self.have_img_dir = True if lmdb: self.lmdb = lmdbs(lmdb) self.have_lmdb = True
def __init__(self, params): self.batch_size = params['batch_size'] self.outshape = params['shape'] self.lmdb = lmdbs(params['source']) self.labels = self.lmdb.get_label_list() self.img_mean = biproto2py(params['mean_file']).squeeze() self.NIMGS = len(self.labels) self.num_batches = int(np.ceil(self.NIMGS/float(self.batch_size))) self._cur = 0 # current batch # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'], scale = params['scale'])
def __init__(self, params): self.batch_size = params['batch_size'] self.outshape = params['shape'] self.lmdb = lmdbs(params['source']) self.labels = self.lmdb.get_label_list() self.img_mean = biproto2py(params['mean_file']).squeeze() self.NIMGS = len(self.labels) assert self.NIMGS%self.batch_size==0,'NIMGS {} not dividible by batchsize {}'.format( self.NIMGS,self.batch_size) self.num_batches = self.NIMGS/self.batch_size self._cur = 0 # current batch self.labels_tab = self.labels.reshape((self.num_batches,self.batch_size)) # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean,shape=params['shape'], scale = params['scale'])
def lmdb_get_info(DB): """ get information about an lmdb IN: DB can be either a true lmdb or a python pickle includes: number of classes, number of sample per class, number of samples """ if DB.endswith('.pkl'): helps = helper() data = helps.load(DB, 1) #load the first variable which is the labels labels = data['labels'] else: lmdb_ = lmdbs(DB) labels = lmdb_.get_label_list() out = {} out['num_classes'] = len(set(labels)) out['num_samples'] = len(labels) if out['num_samples'] % out['num_classes'] != 0: print 'We got an unbalance lmdb having {} samples of {} classes'.format(\ out['num_samples'],out['num_classes']) out['samples_per_class'] = out['num_samples'] / out['num_classes'] return out
def __init__(self, params): self.batch_size = params['batch_size'] self.img_shape = params['shape'] self.classes_per_batch = params['classes_per_batch'] self.img_lmdb = lmdbs(params['img_source']) if params['skt_source'].endswith('.pkl'): self.skt_lmdb = svgs(params['skt_source']) else: self.skt_lmdb = lmdbs(params['skt_source']) self.img_labels = self.img_lmdb.get_label_list() self.skt_labels = self.skt_lmdb.get_label_list() label_ids = list(set(self.img_labels)) NCATS = len(label_ids) if label_ids[0] != 0 or label_ids[-1] != NCATS - 1: if 'verbose' not in params: print 'Your data labels are not [0:{}]. Converting label ...'.format( NCATS - 1) self.img_labels = [ label_ids.index(label) for label in self.img_labels ] self.skt_labels = [ label_ids.index(label) for label in self.skt_labels ] self.img_mean = biproto2py(params['mean_file']).squeeze() #self.skt_mean = biproto2py(params['skt_mean']).squeeze() self.num_classes = len(set(self.skt_labels)) assert self.num_classes == NCATS, 'XX!!Sketch & image datasets unequal #categories' assert len(self.skt_labels)%self.num_classes==0, \ 'Unequal sketch training samples for each class' self.skt_per_class = len(self.skt_labels) / self.num_classes if 'hard_pos' in params: self.hard_sel = 1 self.hard_pos = np.load(params['hard_pos'])['pos'] elif 'hard_neg' in params: self.hard_sel = 2 self.hard_neg = np.load(params['hard_neg'])['neg'] elif 'hard_pn' in params: self.hard_sel = 3 tmp = np.load(params['hard_pn']) self.hard_pos = tmp['pos'] self.hard_neg = tmp['neg'] else: #hard selection turn off self.hard_sel = 0 #self.img_labels_dict, self.classes = vec2dic(self.img_labels) self.indexlist = range(len(self.skt_labels)) self.indexlist_img = range(len(self.img_labels)) #self.shuffle_keeping_min_classes_per_batch() shuffle(self.indexlist) shuffle(self.indexlist_img) self._cur = 0 # current image self._cur_img = 0 # this class does some simple data-manipulations self.img_augment = SimpleAugment(mean=self.img_mean, shape=self.img_shape, scale=params['scale'], rot=params['rot']) print "BatchLoader initialized with {} sketches, {} images of {} classes".format( len(self.skt_labels), len(self.img_labels), self.num_classes) #create threadpools for parallel augmentation self.pool = ThreadPool() #4
def extract_cnn_feat(net_params, DB, OUT, layer=0, verbose=True): """ extract features from CNN DB: lmdb data you want to extract feature net_params: dictionary with keys "DEPLOY_PRO","data_mean", "WEIGHTS","scale_factor", batch_size OUT: save output in mat file layer: 0 for last layer, -1: one before the last layer, -2: ... """ assert layer <= 0, 'layer should be a non-positive integer' DEPLOY_PRO = net_params['DEPLOY_PRO'] WEIGHTS = net_params['WEIGHTS'] scale_factor = net_params['scale_factor'] data_mean = net_params['data_mean'] batch_size = net_params['batch_size'] net = caffe.Net(DEPLOY_PRO, WEIGHTS, caffe.TEST) if verbose: print 'Extracting cnn feats...' print ' Model def: {}\n Weights: {}'.format(DEPLOY_PRO, WEIGHTS) start_t = time.time() db = lmdbs(DB) labels = db.get_label_list() NIMGS = labels.size img_mean = biproto2py(data_mean) inblob = net.inputs[0] in_dim = net.blobs[inblob].data.shape[1:] prep = SimpleAugment(mean=img_mean, shape=in_dim[-2:]) feat_l = net.blobs.keys()[layer - 1] out_dim = net.blobs[feat_l].data.squeeze().shape[-1] feats = np.zeros((NIMGS, out_dim), dtype=np.float32) for i in xrange(0, NIMGS, batch_size): batch = range(i, min(i + batch_size, NIMGS)) if verbose: print('Processing sample #{} - {}'.format(batch[0], batch[-1])) new_shape = (len(batch), ) + in_dim net.blobs[inblob].reshape(*new_shape) chunk = db.get_data(batch) net.blobs[inblob].data[...] = prep.augment_deploy(chunk) temp = net.forward() feats[batch] = net.blobs[feat_l].data.squeeze() #apply scale factor feats *= scale_factor if OUT.endswith('.mat'): py2mat(dict(feats=feats, labels=labels), OUT) elif OUT.endswith('.npz'): np.savez(OUT, feats=feats, labels=labels) else: #assume it is pickle helps = helper() helps.save(OUT, feats=feats, labels=labels) net = None if verbose: end_t = time.time() print 'Save features to {}.'.format(OUT) print 'Time: {}\n'.format(timedelta(seconds=int(end_t - start_t)))