示例#1
0
    def __onep__(self):
        """ called on new epoch. """
        # history records
        h = self.__hist__

        # update the learning rate and suprimum of gradient
        if h[-1]['terr'] < self.__einf__:  # accelerate
            self.lrt.set_value(self.lrt.get_value() * self.lrt_inc.get_value())
            paint(self.nnt, self.__nnt0__)
            self.__einf__ = h[-1]['terr']
        else:  # slow down
            self.lrt.set_value(self.lrt.get_value() * self.lrt_dec.get_value())
            paint(self.__nnt0__, self.nnt)
示例#2
0
    def __rest__(self, key=None):
        """ restore a snap shot. """
        key = -1 if key is None else key
        ret, skp = self.__snap__[key], Snap.__skip__
        for k, v in vars(self).iteritems():
            if k in skp or callable(v) or k not in ret:
                continue
            if isinstance(v, TSV):
                v.set_value(ret[k])
            else:
                v = ret[k]
        paint(ret['nnt'], self.nnt)

        # remove history after the snap shot
        if '__hist__' in vars(self):
            ep = ret['ep'].item()
            del self.__hist__[ep + 1:]
        return ret
示例#3
0
 def __shot__(self, key=None):
     """ take a snap shot. """
     key = -1 if key is None else key
     if '__hist__' in vars(self) and len(self.__hist__) > 0:
         ret = deepcopy(self.__hist__[-1])
     else:
         ret = dict()
     skp = Snap.__skip__
     for k, v in vars(self).iteritems():
         if k in skp or callable(v) or k.startswith('__'):
             continue
         if isinstance(v, TSV):
             ret[k] = v.get_value()
         else:
             ret[k] = deepcopy(v)
     # ret.update(self.__hist__[-1])
     ret['nnt'] = paint(self.nnt)
     self.__snap__[key] = ret
     return ret
示例#4
0
    def __init__(self, nnt, x=None, z=None, u=None, v=None, **kwd):
        """
        Constructor.
        : -------- parameters -------- :
        nnt: an expression builder for the neural network to be trained,
        could be a Nnt object.

        x: the inputs, with the first dimension standing for sample units.
        If unspecified, the trainer will try to evaluate the entry point and
        cache the result as source data.
        
        z: the labels, with the first dimension standing for sample units.
        if unspecified, a simi-unsupervied training is assumed as the labels
        will be identical to the inputs.

        u: the valication data inputs
        v: the validation data labels

        : -------- kwd: keywords -------- :
        -- bsz: size of a training batch
        -- lrt: basic learning rate
        -- lmb: weight decay factor, the lambda

        -- err: expression builder for the computation of training error
        between the network output {y} and the label {z}. the expression
        must evaluate to a scalar.

        -- reg: expression builder for the computation of weight panalize
        the vector of parameters {w}, the expression must evaluate to a
        scalar.

        -- mmt: momentom of the trainer

        -- vdr: validation disruption rate
        """
        # numpy random number generator
        seed = kwd.pop('seed', None)
        nrng = kwd.pop('nrng', np.random.RandomState(seed))

        from theano.tensor.shared_randomstreams import RandomStreams
        trng = kwd.pop('trng', RandomStreams(nrng.randint(0x7FFFFFFF)))

        # private members
        self.__seed__ = seed
        self.__nrng__ = nrng
        self.__trng__ = trng

        # expression of error and regulator terms
        err = getattr(exb, kwd.get('err', 'CE'))
        reg = getattr(exb, kwd.get('reg', 'L1'))

        # the validation disruption
        self.vdr = S(kwd.get('vdr'), 'VDR')

        # current epoch index, use int64
        self.ep = S(0, 'EP')

        # training batch ppsize, use int64
        bsz = kwd.get('bsz', 20)
        self.bsz = S(bsz, 'BSZ')

        # current batch index, use int64
        self.bt = S(0, 'BT')

        # momentumn, make sure momentum is a sane value
        mmt = kwd.get('mmt', .0)
        self.mmt = S(mmt, 'MMT')

        # learning rate
        lrt = kwd.get('lrt', .01)
        lrt_inc = kwd.get('inc', 1.04)
        lrt_dec = kwd.get('dec', 0.85)
        self.lrt = S(lrt, 'LRT')
        self.lrt_inc = S(lrt_inc, 'LRT_INC')
        self.lrt_dec = S(lrt_dec, 'LRT_DEC')

        # the raio of weight decay, lambda
        lmd = kwd.get('lmd', .0)
        self.lmd = S(lmd, 'LMD')

        # the neural network
        self.nnt = nnt
        self.dim = (nnt.dim[0], nnt.dim[-1])
        # supremum of gradient
        self.gsup = S(.0)

        # inputs and labels, for modeling and validation
        x = S(np.zeros((bsz * 2, self.dim[0]), 'f') if x is None else x)
        z = x if z is None else S(z)
        u = x if u is None else S(u)
        v = u if v is None else S(v)
        self.x, self.z, self.u, self.v = x, z, u, v

        # -------- construct trainer function -------- *
        # 1) symbolic expressions
        x = T.tensor(name='x', dtype=x.dtype, broadcastable=x.broadcastable)
        z = T.tensor(name='z', dtype=z.dtype, broadcastable=z.broadcastable)
        y = nnt(x)  # expression of predicted output

        # list of symbolic parameters to be tuned
        pars = parms(y)  # parameters

        # list of  symbolic weights to apply decay
        vwgt = T.concatenate([p.flatten() for p in pars if p.name == 'w'])

        # symbolic batch cost, which is the mean trainning erro over all
        # observations and sub attributes.

        # The observations are indexed by the first dimension of y and z, while
        # the sub attributes are indexed by the rest but not last dimensions.

        # The last dimension indices data points for each observation, examples
        # are voxels in an MRI volume, and SNPs in a genome segment.
        # Values of these data points are aggregated by the objective function,
        # err,  which can be L1, L2 norm and CE. the objective function return
        # an scalar ratting of the training loss.
        erro = err(y, z).mean()

        # the sum of weights calculated for weight decay.
        wsum = reg(vwgt)
        cost = erro + wsum * self.lmd
        l2er = exb.L2(y, z).mean()

        # symbolic gradient of cost WRT parameters
        grad = T.grad(cost, pars)
        gabs = T.concatenate([T.abs_(g.flatten()) for g in grad])
        gsup = T.max(gabs)

        # trainer control
        nwep = ((self.bt + 1) * self.bsz) // self.x.shape[-2]  # new epoch?

        # 2) define updates after each batch training
        up = []

        # update parameters using gradiant decent, and momentum
        for p, g in zip(pars, grad):
            # initialize accumulated gradient
            # NOTE: p.eval() causes mehem!!
            h = S(np.zeros_like(p.get_value()))

            # accumulate gradient, partially historical (due to the momentum),
            # partially noval
            up.append((h, self.mmt * h + (1 - self.mmt) * g))

            # update parameters by stepping down the accumulated gradient
            up.append((p, p - self.lrt * h))

        # update batch and eqoch index
        up.append((self.bt, (self.bt + 1) * (1 - nwep)))
        up.append((self.ep, self.ep + nwep))

        # 3) the trainer functions
        # expression of batch and whole data feed:
        _ = T.arange((self.bt + 0) * self.bsz, (self.bt + 1) * self.bsz)
        bts = {x: self.x.take(_, -2, 'wrap'), z: self.z.take(_, -2, 'wrap')}
        dts = {x: self.x, z: self.z}

        # each invocation sends one batch of training examples to the network,
        # calculate total cost and tune the parameters by gradient decent.
        self.step = F([], cost, name="step", givens=bts, updates=up)

        # training error, training cost
        self.terr = F([], erro, name="terr", givens=dts)
        self.tcst = F([], cost, name="tcst", givens=dts)

        # weights, and parameters
        self.wsum = F([], wsum, name="wsum")
        self.gsup = F([], gsup, name="gsup", givens=dts)

        self.l2er = F([], l2er, name="l2er", givens=dts)
        # * -------- done with trainer functions -------- *

        # * -------- validation functions -------- *
        # enable validation binary disruption (binary)?
        if self.vdr:
            _ = self.__trng__.binomial(self.v.shape, 1, self.vdr, dtype=FX)
            vts = {x: self.u, z: (self.v + _) % C(2.0, FX)}
        else:
            vts = {x: self.u, z: self.v}
        self.verr = F([], erro, name="verr", givens=vts)

        # * ---------- logging and recording ---------- *
        hd, rm = [], ['step', 'gavg', 'nwgt', 'berr', 'bcst']
        for k, v in self.__dict__.items():
            if k.startswith('__') or k in rm:
                continue
            if isinstance(v, type(self.lmd)) and v.ndim < 1:
                hd.append((k, v.get_value))
            if isinstance(v, type(self.bsz)) and v.ndim < 1:
                hd.append((k, v.get_value))
            if isinstance(v, type(self.step)):
                hd.append((k, v))

        self.__head__ = hd
        self.__time__ = .0

        # the first record
        self.__hist__ = [self.__rpt__()]
        self.__gsup__ = self.__hist__[0]['gsup']
        self.__einf__ = self.__hist__[0]['terr']
        self.__nnt0__ = paint(self.nnt)

        # printing format
        self.__pfmt__ = (
            '{ep:04d}.{bt:03d}: {tcst:.2f} = {terr:.2f} + {lmd:.2e}*{wsum:.1f}'
            '|{verr:.2f}, {l2er:.2f}|, {gsup:.2e}, {lrt:.2e}')