class LNLSTM(object): def __init__(self, input_size, layer_size, batch_size=1, p=0.0, name="", activation=T.tanh, inner_activation=T.nnet.sigmoid, weight_init=Uniform(), persistent=False): self.h = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_h_init") self.c = theano.shared(numpy.zeros((batch_size, layer_size), dtype=theano.config.floatX), name=name+"_c_init") self.params = [] self.preact = Sequential([ Linear(input_size+layer_size, layer_size * 4, weight_init=weight_init, name=name+"_ifog"), LayerNormalization(layer_size * 4, name=name + "_ln") ]) self.params = self.preact.params self.dropout = Dropout(p) self.updates = [] self.activation = activation self.inner_activation = inner_activation self.batch_size = batch_size self.layer_size = layer_size self.persistent = persistent def __call__(self, x): if self.persistent: outputs_info = [self.c, self.h] else: outputs_info = [T.zeros((x.shape[1], self.layer_size)), T.zeros((x.shape[1], self.layer_size))] [c, h], upd = theano.scan(self.step, x, outputs_info=outputs_info) if self.persistent: upd[self.c] = c[-1] upd[self.h] = h[-1] self.updates = OrderedDict() self.updates.update(upd) return h def step(self, x_t, c_tm1, h_tm1): ifog = self.preact(T.concatenate([x_t, h_tm1], axis=1)) i_t, f_t, o_t, g_t = self._split(ifog) c_t = f_t * c_tm1 + i_t * self.dropout(g_t) h_t = o_t * self.activation(c_t) return c_t, h_t def set_phase(self, train): self.dropout.set_phase(train) def reset(self): if self.persistent: self.h.set_value(numpy.zeros_like(self.h.get_value(), dtype=theano.config.floatX)) self.c.set_value(numpy.zeros_like(self.c.get_value(), dtype=theano.config.floatX)) def _split(self, x): i = x[:, 0 * self.layer_size:1 * self.layer_size] f = x[:, 1 * self.layer_size:2 * self.layer_size] o = x[:, 2 * self.layer_size:3 * self.layer_size] g = x[:, 3 * self.layer_size:4 * self.layer_size] return self.inner_activation(i), self.inner_activation(f), self.inner_activation(o), self.activation(g)
class HighwayConvolution1d(object): def __init__(self, kernel_size, input_size, causal=True, dilation=1, weight_init=Uniform(), name="", keepdims=False, p=0.0): from nn.normalization import LayerNormalization assert kernel_size == 3 self.conv = Sequential([ Convolution1d(kernel_size, input_size * 3, input_size, pad=dilation, causal=causal, dilation=dilation, weight_init=weight_init, name=name, keepdims=keepdims), BatchNormalization(input_size * 3, name=name + "_bn"), ]) self.dropout = Dropout(p) self.input_size = input_size self.params = self.conv.params def __call__(self, x): i, f, g = self._split(self.conv(x)) y = T.nnet.sigmoid(f) * x + T.nnet.sigmoid(i) * self.dropout(T.tanh(g)) return y def _split(self, x): return x[:, 0 * self.input_size:1 * self.input_size], \ x[:, 1 * self.input_size:2 * self.input_size], \ x[:, 2 * self.input_size:3 * self.input_size] def set_phase(self, train): self.dropout.set_phase(train)