def predict(self, save_layers = None, filename = None): self.net.save_layerouput(save_layers) self.print_net_summary() util.log('Starting predict...') save_output = [] while self.curr_epoch < 2: start = time.time() self.test_data = self.test_dp.get_next_batch() self.curr_epoch = self.test_data.epoch self.curr_batch = self.test_data.batchnum self.num_test_minibatch = divup(self.test_data.data.shape[1], self.batch_size) for i in range(self.num_test_minibatch): input, label = self.get_next_minibatch(i, TEST) self.net.train_batch(input, label, TEST) cost , correct, numCase = self.net.get_batch_information() print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start) if save_layers is not None: save_output.extend(self.net.get_save_output()) if save_layers is not None: if filename is not None: with open(filename, 'w') as f: cPickle.dump(save_output, f, protocol = -1) util.log('save layer output finished')
def __init__(self, data_dir, batch_range=None, category_range=None, batch_size=128): ParallelDataProvider.__init__(self, data_dir, batch_range) self.img_size = 256 self.border_size = 16 self.inner_size = 224 self.batch_size = batch_size # self.multiview = dp_params['multiview_test'] and test self.multiview = 0 self.num_views = 5 * 2 self.data_mult = self.num_views if self.multiview else 1 self.buffer_idx = 0 dirs = glob.glob(data_dir + '/n*') synid_to_dir = {} for d in dirs: synid_to_dir[basename(d)[1:]] = d if category_range is None: cat_dirs = dirs else: cat_dirs = [] for i in category_range: synid = self.batch_meta['label_to_synid'][i] # util.log('Using category: %d, synid: %s, label: %s', i, synid, self.batch_meta['label_names'][i]) cat_dirs.append(synid_to_dir[synid]) self.images = [] batch_dict = dict((k, k) for k in self.batch_range) for d in cat_dirs: imgs = [v for i, v in enumerate(glob.glob(d + '/*.jpg')) if i in batch_dict] self.images.extend(imgs) self.images = np.array(self.images) # build index vector into 'images' and split into groups of batch-size image_index = np.arange(len(self.images)) np.random.shuffle(image_index) self.batches = np.array_split(image_index, util.divup(len(self.images), batch_size)) self.batch_range = range(len(self.batches)) util.log('Starting data provider with %d batches', len(self.batches)) np.random.shuffle(self.batch_range) imagemean = cPickle.loads(open(data_dir + "image-mean.pickle").read()) self.data_mean = (imagemean['data'] .astype(np.single) .T .reshape((3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size] .reshape((self.get_data_dims(), 1)))
def get_test_error(self): start = time.time() self.test_data = self.test_dp.get_next_batch() self.num_test_minibatch = divup(self.test_data.data.shape[1], self.batch_size) for i in range(self.num_test_minibatch): input, label = self.get_next_minibatch(i, TEST) self.net.train_batch(input, label, TEST) self._capture_test_data() cost , correct, numCase, = self.net.get_batch_information() self.test_outputs += [({'logprob': [cost, 1 - correct]}, numCase, time.time() - start)] print >> sys.stderr, '[%d] error: %f logreg: %f time: %f' % (self.test_data.batchnum, 1 - correct, cost, time.time() - start)
def train(self): self.print_net_summary() util.log('Starting training...') while self.should_continue_training(): self.train_data = self.train_dp.get_next_batch() # self.train_dp.wait() self.curr_epoch = self.train_data.epoch self.curr_batch = self.train_data.batchnum start = time.time() self.num_train_minibatch = divup(self.train_data.data.shape[1], self.batch_size) t = 0 for i in range(self.num_train_minibatch): input, label = self.get_next_minibatch(i) stime = time.time() self.net.train_batch(input, label) self._capture_training_data() t += time.time() - stime self.curr_minibatch += 1 cost , correct, numCase = self.net.get_batch_information() self.train_outputs += [({'logprob': [cost, 1 - correct]}, numCase, time.time() - start)] print >> sys.stderr, '%d.%d: error: %f logreg: %f time: %f' % (self.curr_epoch, self.curr_batch, 1 - correct, cost, time.time() - start) self.num_batch += 1 if self.check_test_data(): print >> sys.stderr, '---- test ----' self.get_test_error() print >> sys.stderr, '------------' if self.factor != 1.0 and self.check_adjust_lr(): print >> sys.stderr, '---- adjust learning rate ----' self.net.adjust_learning_rate(self.factor) print >> sys.stderr, '--------' if self.check_save_checkpoint(): print >> sys.stderr, '---- save checkpoint ----' self.save_checkpoint() print >> sys.stderr, '------------' wait_time = time.time() #print 'waitting', time.time() - wait_time, 'secs to load' #print 'time to train a batch file is', time.time() - start) self.get_test_error() self.save_checkpoint() self.report() self._finished_training()
def __init__(self, data_dir, batch_range=None, category_range=None, batch_size=128): ParallelDataProvider.__init__(self, data_dir, batch_range) self.img_size = 256 self.border_size = 16 self.inner_size = 224 self.batch_size = batch_size # self.multiview = dp_params['multiview_test'] and test self.multiview = 0 self.num_views = 5 * 2 self.data_mult = self.num_views if self.multiview else 1 self.buffer_idx = 0 dirs = glob.glob(data_dir + '/n*') synid_to_dir = {} for d in dirs: synid_to_dir[basename(d)[1:]] = d if category_range is None: cat_dirs = dirs else: cat_dirs = [] for i in category_range: synid = self.batch_meta['label_to_synid'][i] # util.log('Using category: %d, synid: %s, label: %s', i, synid, self.batch_meta['label_names'][i]) cat_dirs.append(synid_to_dir[synid]) self.images = [] batch_dict = dict((k, k) for k in self.batch_range) for d in cat_dirs: imgs = [ v for i, v in enumerate(glob.glob(d + '/*.jpg')) if i in batch_dict ] self.images.extend(imgs) self.images = np.array(self.images) # build index vector into 'images' and split into groups of batch-size image_index = np.arange(len(self.images)) np.random.shuffle(image_index) self.batches = np.array_split(image_index, util.divup(len(self.images), batch_size)) self.batch_range = range(len(self.batches)) util.log('Starting data provider with %d batches', len(self.batches)) np.random.shuffle(self.batch_range) imagemean = cPickle.loads(open(data_dir + "image-mean.pickle").read()) self.data_mean = (imagemean['data'].astype(np.single).T.reshape( (3, 256, 256))[:, self.border_size:self.border_size + self.inner_size, self.border_size:self.border_size + self.inner_size].reshape((self.get_data_dims(), 1)))