def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True): """ Affine transformation + point-wise nonlinearity """ if nin == None: nin = options['dim_proj'] if nout == None: nout = options['dim_proj'] params[_p(prefix,'W')] = norm_weight(nin, nout, ortho=ortho) params[_p(prefix,'b')] = numpy.zeros((nout,)).astype('float32') return params
def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ Gated Recurrent Unit (GRU) """ if nin == None: nin = options['dim_proj'] if dim == None: dim = options['dim_proj'] W = numpy.concatenate([norm_weight(nin,dim), norm_weight(nin,dim)], axis=1) params[_p(prefix,'W')] = W params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) params[_p(prefix,'U')] = U Wx = norm_weight(nin, dim) params[_p(prefix,'Wx')] = Wx Ux = ortho_weight(dim) params[_p(prefix,'Ux')] = Ux params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') return params
def gru_layer(tparams, state_below, init_state, options, prefix='gru', mask=None, **kwargs): """ Feedforward pass through GRU """ nsteps = state_below.shape[0] if state_below.ndim == 3: n_samples = state_below.shape[1] else: n_samples = 1 dim = tparams[_p(prefix,'Ux')].shape[1] if init_state == None: init_state = tensor.alloc(0., n_samples, dim) if mask == None: mask = tensor.alloc(1., state_below.shape[0], 1) def _slice(_x, n, dim): if _x.ndim == 3: return _x[:, :, n*dim:(n+1)*dim] return _x[:, n*dim:(n+1)*dim] # state_below is <steps,[ samples?,] data> state_below_ = tensor.dot(state_below, tparams[_p(prefix, 'W')]) + tparams[_p(prefix, 'b')] # W is [Wr; Wz] state_belowx = tensor.dot(state_below, tparams[_p(prefix, 'Wx')]) + tparams[_p(prefix, 'bx')] # Wx, bx are weights of input x on next state proposal U = tparams[_p(prefix, 'U')] # U is [Ur; Uz] Ux = tparams[_p(prefix, 'Ux')] # Ux is weight of previous hidden state on next state proposal def _step_slice(m_, x_, xx_, h_, U, Ux): preact = tensor.dot(h_, U) preact += x_ r = tensor.nnet.sigmoid(_slice(preact, 0, dim)) u = tensor.nnet.sigmoid(_slice(preact, 1, dim)) # seems to deviate slightly from the original paper # r prod Uh instead of U(r prod h) # preactx = xx_ + tensor.dot(Ux, (r * h_)) preactx = tensor.dot(h_, Ux) preactx = preactx * r preactx = preactx + xx_ h = tensor.tanh(preactx) # h_proposed h = u * h_ + (1. - u) * h h = m_[:,None] * h + (1. - m_)[:,None] * h_ return h seqs = [mask, state_below_, state_belowx] _step = _step_slice rval, updates = theano.scan(_step, sequences=seqs, outputs_info = [init_state], non_sequences = [tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]], name=_p(prefix, '_layers'), n_steps=nsteps, profile=False, strict=True) rval = [rval] return rval
def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs): """ Feedforward pass """ return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])