Exemplo n.º 1
0
 def process_dataset(self, dataset, as_list=False, as_2d=False):
     """Processes a whole dataset and returns an numpy ndarray
     
     Input:
         dataset: the input dataset.
         as_list: if True, return a list. This applies when the output has
             different sizes for each image. Default False.
         as_2d: if True, return a matrix where each image corresponds to a
             row in the matrix. Default False.
     """
     # check if we want to use buffer
     if self._fixed_size:
         convbuffer = [None] * (len(self) + 1)
     else:
         convbuffer = None
     total = dataset.size_total()
     logging.debug("Processing a total of %s images" % (total, ))
     timer = util.Timer()
     if as_list:
         data = [self.process(dataset.image(i), convbuffer = convbuffer) \
                 for i in range(dataset.size())]
     else:
         # we assume that each image leads to the same feature size
         temp = self.process(dataset.image(0), as_vector=as_2d)
         logging.debug("Output feature shape: %s" % (str(temp.shape)))
         data = np.empty((dataset.size(), ) + temp.shape)
         data[0] = temp
         size = dataset.size()
         timer = util.Timer()
         for i in range(1, size):
             data[i] = self.process(dataset.image(i),
                                    as_vector=as_2d,
                                    convbuffer=convbuffer)
             # report local progress
             if (i * 10 / size) != ((i - 1) * 10 / size):
                 logging.debug("rank %d: %d percent. elapsed %s" % \
                         (mpi.RANK, i*100 / size, timer.total()))
     mpi.barrier()
     logging.debug("Feature extration took %s" % timer.total())
     return data
Exemplo n.º 2
0
def omp_n(X, k, num_active, max_iter=100, tol=1e-4):
    '''OMP training with MPI
    
    Input:
        X: a num_data_local * dim numpy matrix containing the data, each row
            being a datum.
        k: the dictionary size.
        num_active: the number of active dictionary entries for each datum
        max_iter: (optional) the maximum number of iteration. Default 100.
        tol: (optional) the tolerance threshold to determine convergence.
            Default 1e-4.
    '''
    # vdata is used for testing convergence
    Nlocal = X.shape[0]
    vdatalocal = np.sum(np.var(X, 0))
    N = mpi.COMM.allreduce(Nlocal)
    vdata = mpi.COMM.allreduce(vdatalocal)
    vdata /= N
    # random initialization
    centroids = np.random.randn(k, X.shape[1])
    centroids /= np.sqrt(np.sum(centroids**2, axis=1)).reshape(k, 1)
    centroids_all = mpi.COMM.gather(centroids)
    # make sure we are using the same centroids on all nodes
    if mpi.is_root():
        centroids_all = np.vstack(centroids_all)
        centroids[:] = centroids_all[\
                np.random.permutation(centroids_all.shape[0])[:k]]
    mpi.COMM.Bcast(centroids, root=0)

    timer = util.Timer()
    for iter_id in range(max_iter):
        logging.debug("OMP-%d iter %d, last iteration %s, elapsed %s" % \
                      (num_active, iter_id, timer.lap(), timer.total()))
        centroids_old = centroids.copy()
        labels, val = omp_n_predict(X, centroids, num_active)
        centroids = omp_n_maximize(X, labels, val, k)
        # check convergence on root
        if mpi.is_root():
            converged = np.sum((centroids_old - centroids)**2) < tol * vdata
        else:
            converged = None
        converged = mpi.COMM.bcast(converged)
        if converged:
            logging.debug("OMP has converged.")
            break
    else:
        logging.debug("OMP reached the maximum number of iterations.")
    return centroids
Exemplo n.º 3
0
 def solve(self, sampler, param_init = None, K = None,
          resume = None, new_lr = None):
     """The solve function.
     Input:
         sampler: the data sampler. sampler.sample() should return a list
             of training data, either (X, Y, weight) or (X, Y, None)
             depending on whether weight is enforced.
         param_init: the initial parameter. See SolverMC for details.
     """
     mode = self._args.get('mode', 'lbfgs').lower()
     # even when we use Adagrad we create a solver_basic to deal with
     # function value and gradient computation, etc.
     solver_basic = SolverMC(self._gamma, self.loss, self.reg,
             self._args, self._lossargs, self._regargs,
             self._fminargs)
     param = param_init
     iter_start = 0
     if resume is not None:
         # load data from
         logging.debug('Resuming from %s' % resume)
         npzdata = np.load(resume)
         param = (npzdata['w'], npzdata['b'])
         iter_start = npzdata['iter'] + 1
         if 'accum_grad' in npzdata:
             accum_grad = npzdata['accum_grad']
         if 'base_lr' in npzdata:
             self._args['base_lr'] = npzdata['base_lr']
         if new_lr is not None:
             self._args['base_lr'] = new_lr
     timer = util.Timer()
     for iter in range(iter_start, self._args['num_iter']):
         Xbatch, Ybatch, weightbatch = sampler.sample(self._args['minibatch'])
         # carry out the computation
         if mode == 'lbfgs':
             accum_grad = None
             param = solver_basic.solve(Xbatch, Ybatch, weightbatch, param, K = K)
             logging.debug('iter %d time = %s' % \
                     (iter, str(timer.total(False))))
         else:
             # adagrad: compute gradient and update
             if iter == iter_start:
                 logging.debug("Adagrad: Initializing")
                 param_flat = solver_basic.presolve(\
                         Xbatch, Ybatch, weightbatch, param, K = K)
                 # we need to build the cache in solver_basic as well as
                 # the accumulated gradients
                 if iter == 0:
                     accum_grad = np.ones_like(param_flat) * \
                             (self._args.get('eta', 0.) ** 2) + \
                             np.finfo(np.float64).eps
                 if 'base_lr' not in self._args or self._args['base_lr'] < 0:
                     logging.debug("Adagrad: Performing line search")
                     # do a line search to get the value
                     self._args['base_lr'] = \
                             mathutil.wolfe_line_search_adagrad(param_flat,
                             lambda x: SolverMC.obj(x, solver_basic),
                             alpha = np.abs(self._args.get('base_lr', 1.)),
                             eta = self._args.get('eta', 0.))
                     # reset the timer to exclude the base learning rate tuning
                     # time
                     timer.reset()
             else:
                 solver_basic._X = Xbatch
                 solver_basic._Y = Ybatch
                 solver_basic._weight = weightbatch
             logging.debug("Adagrad: Computing func and grad")
             f0, g = SolverMC.obj(param_flat, solver_basic)
             logging.debug('gradient max/min: %f/%f' % (g.max(), g.min()))
             accum_grad += g * g
             # we are MINIMIZING, so go against the gradient direction
             param_flat -= g / np.sqrt(accum_grad) * self._args['base_lr']
             # the below code could be used to debug, but is commented out
             # currently for speed considerations.
             if False:
                 f = SolverMC.obj(param_flat, solver_basic)[0] 
                 logging.debug('iter %d f0 = %f f = %f time = %s' % \
                         (iter, f0, f,\
                         str(timer.total(False))))
             else:
                 logging.debug('iter %d f0 = %f time = %s' % \
                         (iter, f0, str(timer.total(False))))
             param = solver_basic.unflatten_params(param_flat)
         callback = self._args.get('callback', None)
         if callback is None:
             pass
         elif type(callback) is not list:
             cb_val = callback(param)
             logging.debug('cb: ' + str(cb_val))
         else:
             cb_val = [cb_func(param) for cb_func in callback]
             logging.debug('cb: ' + ' '.join([str(v) for v in cb_val]))
         if 'dump_every' in self._args and \
                 (iter + 1) % self._args['dump_every'] == 0:
             logging.debug('dumping param...')
             mpi.root_savez(self._args['dump_name'],\
                     iter=iter, w = param[0], b = param[1], \
                     accum_grad = accum_grad, base_lr = self._args['base_lr'])
     return param
Exemplo n.º 4
0
 def solve(self, sampler, param_init = None):
     """The solve function.
     Input:
         sampler: the data sampler. sampler.sample() should return a list
             of training data, either (X, Y, weight) or (X, Y, None)
             depending on whether weight is enforced.
         param_init: the initial parameter. See SolverMC for details.
     """
     mode = self._args.get('mode', 'lbfgs').lower()
     # even when we use Adagrad we create a solver_basic to deal with
     # function value and gradient computation, etc.
     solver_basic = SolverMC(self._gamma, self.loss, self.reg,
             self._args, self._lossargs, self._regargs,
             self._fminargs)
     param = param_init
     timer = util.Timer()
     for iter in range(self._args['num_iter']):
         Xbatch, Ybatch, weightbatch = sampler.sample(self._args['minibatch'])
         # carry out the computation
         if mode == 'lbfgs':
             param = solver_basic.solve(Xbatch, Ybatch, weightbatch, param)
             logging.debug('iter %d time = %s' % \
                     (iter, str(timer.total(False))))
         else:
             # adagrad: compute gradient and update
             param_flat = solver_basic.presolve(\
                     Xbatch, Ybatch, weightbatch, param)
             if iter == 0:
                 # we need to build the cache in solver_basic as well as
                 # the accumulated gradients
                 accum_grad = np.ones_like(param_flat) * \
                         (self._args.get('eta', 0.) ** 2) + \
                         np.finfo(np.float64).eps
                 if self._args.get('base_lr', None) is None:
                     # do a line search to get the value
                     self._args['base_lr'] = \
                             mathutil.wolfe_line_search_adagrad(param_flat,
                             lambda x: SolverMC.obj(x, solver_basic),
                             eta = self._args.get('eta', 0.))
                     # reset the timer to exclude the base learning rate tuning
                     # time
                     timer.reset()
             f0, g = SolverMC.obj(param_flat, solver_basic)
             accum_grad += g * g
             # we are MINIMIZING, so go against the gradient direction
             param_flat -= g / np.sqrt(accum_grad) * self._args['base_lr']
             f = SolverMC.obj(param_flat, solver_basic)[0] 
             logging.debug('iter %d f0 = %f f = %f time = %s' % \
                     (iter, f0, f,\
                     str(timer.total(False))))
             param = solver_basic.unflatten_params(param_flat)
         callback = self._args.get('callback', None)
         if callback is None:
             continue
         if type(callback) is not list:
             cb_val = callback(param)
             logging.debug('cb: ' + str(cb_val))
         else:
             cb_val = [cb_func(param) for cb_func in callback]
             logging.debug('cb: ' + ' '.join([str(v) for v in cb_val]))
     # the stochastic part is done. See if we want to do fine-tuning.
     finetune = self._args.get('fine_tune', 0)
     if finetune > 0:
         solver_basic._fminargs['maxfun'] = int(finetune)
         param = solver_basic.solve(X, Y, weight, param)
     return param