def process_dataset(self, dataset, as_list=False, as_2d=False): """Processes a whole dataset and returns an numpy ndarray Input: dataset: the input dataset. as_list: if True, return a list. This applies when the output has different sizes for each image. Default False. as_2d: if True, return a matrix where each image corresponds to a row in the matrix. Default False. """ # check if we want to use buffer if self._fixed_size: convbuffer = [None] * (len(self) + 1) else: convbuffer = None total = dataset.size_total() logging.debug("Processing a total of %s images" % (total, )) timer = util.Timer() if as_list: data = [self.process(dataset.image(i), convbuffer = convbuffer) \ for i in range(dataset.size())] else: # we assume that each image leads to the same feature size temp = self.process(dataset.image(0), as_vector=as_2d) logging.debug("Output feature shape: %s" % (str(temp.shape))) data = np.empty((dataset.size(), ) + temp.shape) data[0] = temp size = dataset.size() timer = util.Timer() for i in range(1, size): data[i] = self.process(dataset.image(i), as_vector=as_2d, convbuffer=convbuffer) # report local progress if (i * 10 / size) != ((i - 1) * 10 / size): logging.debug("rank %d: %d percent. elapsed %s" % \ (mpi.RANK, i*100 / size, timer.total())) mpi.barrier() logging.debug("Feature extration took %s" % timer.total()) return data
def omp_n(X, k, num_active, max_iter=100, tol=1e-4): '''OMP training with MPI Input: X: a num_data_local * dim numpy matrix containing the data, each row being a datum. k: the dictionary size. num_active: the number of active dictionary entries for each datum max_iter: (optional) the maximum number of iteration. Default 100. tol: (optional) the tolerance threshold to determine convergence. Default 1e-4. ''' # vdata is used for testing convergence Nlocal = X.shape[0] vdatalocal = np.sum(np.var(X, 0)) N = mpi.COMM.allreduce(Nlocal) vdata = mpi.COMM.allreduce(vdatalocal) vdata /= N # random initialization centroids = np.random.randn(k, X.shape[1]) centroids /= np.sqrt(np.sum(centroids**2, axis=1)).reshape(k, 1) centroids_all = mpi.COMM.gather(centroids) # make sure we are using the same centroids on all nodes if mpi.is_root(): centroids_all = np.vstack(centroids_all) centroids[:] = centroids_all[\ np.random.permutation(centroids_all.shape[0])[:k]] mpi.COMM.Bcast(centroids, root=0) timer = util.Timer() for iter_id in range(max_iter): logging.debug("OMP-%d iter %d, last iteration %s, elapsed %s" % \ (num_active, iter_id, timer.lap(), timer.total())) centroids_old = centroids.copy() labels, val = omp_n_predict(X, centroids, num_active) centroids = omp_n_maximize(X, labels, val, k) # check convergence on root if mpi.is_root(): converged = np.sum((centroids_old - centroids)**2) < tol * vdata else: converged = None converged = mpi.COMM.bcast(converged) if converged: logging.debug("OMP has converged.") break else: logging.debug("OMP reached the maximum number of iterations.") return centroids
def solve(self, sampler, param_init = None, K = None, resume = None, new_lr = None): """The solve function. Input: sampler: the data sampler. sampler.sample() should return a list of training data, either (X, Y, weight) or (X, Y, None) depending on whether weight is enforced. param_init: the initial parameter. See SolverMC for details. """ mode = self._args.get('mode', 'lbfgs').lower() # even when we use Adagrad we create a solver_basic to deal with # function value and gradient computation, etc. solver_basic = SolverMC(self._gamma, self.loss, self.reg, self._args, self._lossargs, self._regargs, self._fminargs) param = param_init iter_start = 0 if resume is not None: # load data from logging.debug('Resuming from %s' % resume) npzdata = np.load(resume) param = (npzdata['w'], npzdata['b']) iter_start = npzdata['iter'] + 1 if 'accum_grad' in npzdata: accum_grad = npzdata['accum_grad'] if 'base_lr' in npzdata: self._args['base_lr'] = npzdata['base_lr'] if new_lr is not None: self._args['base_lr'] = new_lr timer = util.Timer() for iter in range(iter_start, self._args['num_iter']): Xbatch, Ybatch, weightbatch = sampler.sample(self._args['minibatch']) # carry out the computation if mode == 'lbfgs': accum_grad = None param = solver_basic.solve(Xbatch, Ybatch, weightbatch, param, K = K) logging.debug('iter %d time = %s' % \ (iter, str(timer.total(False)))) else: # adagrad: compute gradient and update if iter == iter_start: logging.debug("Adagrad: Initializing") param_flat = solver_basic.presolve(\ Xbatch, Ybatch, weightbatch, param, K = K) # we need to build the cache in solver_basic as well as # the accumulated gradients if iter == 0: accum_grad = np.ones_like(param_flat) * \ (self._args.get('eta', 0.) ** 2) + \ np.finfo(np.float64).eps if 'base_lr' not in self._args or self._args['base_lr'] < 0: logging.debug("Adagrad: Performing line search") # do a line search to get the value self._args['base_lr'] = \ mathutil.wolfe_line_search_adagrad(param_flat, lambda x: SolverMC.obj(x, solver_basic), alpha = np.abs(self._args.get('base_lr', 1.)), eta = self._args.get('eta', 0.)) # reset the timer to exclude the base learning rate tuning # time timer.reset() else: solver_basic._X = Xbatch solver_basic._Y = Ybatch solver_basic._weight = weightbatch logging.debug("Adagrad: Computing func and grad") f0, g = SolverMC.obj(param_flat, solver_basic) logging.debug('gradient max/min: %f/%f' % (g.max(), g.min())) accum_grad += g * g # we are MINIMIZING, so go against the gradient direction param_flat -= g / np.sqrt(accum_grad) * self._args['base_lr'] # the below code could be used to debug, but is commented out # currently for speed considerations. if False: f = SolverMC.obj(param_flat, solver_basic)[0] logging.debug('iter %d f0 = %f f = %f time = %s' % \ (iter, f0, f,\ str(timer.total(False)))) else: logging.debug('iter %d f0 = %f time = %s' % \ (iter, f0, str(timer.total(False)))) param = solver_basic.unflatten_params(param_flat) callback = self._args.get('callback', None) if callback is None: pass elif type(callback) is not list: cb_val = callback(param) logging.debug('cb: ' + str(cb_val)) else: cb_val = [cb_func(param) for cb_func in callback] logging.debug('cb: ' + ' '.join([str(v) for v in cb_val])) if 'dump_every' in self._args and \ (iter + 1) % self._args['dump_every'] == 0: logging.debug('dumping param...') mpi.root_savez(self._args['dump_name'],\ iter=iter, w = param[0], b = param[1], \ accum_grad = accum_grad, base_lr = self._args['base_lr']) return param
def solve(self, sampler, param_init = None): """The solve function. Input: sampler: the data sampler. sampler.sample() should return a list of training data, either (X, Y, weight) or (X, Y, None) depending on whether weight is enforced. param_init: the initial parameter. See SolverMC for details. """ mode = self._args.get('mode', 'lbfgs').lower() # even when we use Adagrad we create a solver_basic to deal with # function value and gradient computation, etc. solver_basic = SolverMC(self._gamma, self.loss, self.reg, self._args, self._lossargs, self._regargs, self._fminargs) param = param_init timer = util.Timer() for iter in range(self._args['num_iter']): Xbatch, Ybatch, weightbatch = sampler.sample(self._args['minibatch']) # carry out the computation if mode == 'lbfgs': param = solver_basic.solve(Xbatch, Ybatch, weightbatch, param) logging.debug('iter %d time = %s' % \ (iter, str(timer.total(False)))) else: # adagrad: compute gradient and update param_flat = solver_basic.presolve(\ Xbatch, Ybatch, weightbatch, param) if iter == 0: # we need to build the cache in solver_basic as well as # the accumulated gradients accum_grad = np.ones_like(param_flat) * \ (self._args.get('eta', 0.) ** 2) + \ np.finfo(np.float64).eps if self._args.get('base_lr', None) is None: # do a line search to get the value self._args['base_lr'] = \ mathutil.wolfe_line_search_adagrad(param_flat, lambda x: SolverMC.obj(x, solver_basic), eta = self._args.get('eta', 0.)) # reset the timer to exclude the base learning rate tuning # time timer.reset() f0, g = SolverMC.obj(param_flat, solver_basic) accum_grad += g * g # we are MINIMIZING, so go against the gradient direction param_flat -= g / np.sqrt(accum_grad) * self._args['base_lr'] f = SolverMC.obj(param_flat, solver_basic)[0] logging.debug('iter %d f0 = %f f = %f time = %s' % \ (iter, f0, f,\ str(timer.total(False)))) param = solver_basic.unflatten_params(param_flat) callback = self._args.get('callback', None) if callback is None: continue if type(callback) is not list: cb_val = callback(param) logging.debug('cb: ' + str(cb_val)) else: cb_val = [cb_func(param) for cb_func in callback] logging.debug('cb: ' + ' '.join([str(v) for v in cb_val])) # the stochastic part is done. See if we want to do fine-tuning. finetune = self._args.get('fine_tune', 0) if finetune > 0: solver_basic._fminargs['maxfun'] = int(finetune) param = solver_basic.solve(X, Y, weight, param) return param