def __call__(self, x, h, c): ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h)) ct = tanh.tanh(self.W_cx(x) + self.W_ch(h)) ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h)) c = ft * c + (1 - ft)) * ct h = ot * tanh.tanh(c) return h, c
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with chainer.using_device(self.device): self.c = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape(lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, a.shape[:2]) i = reshape.reshape(i, i.shape[:2]) f = reshape.reshape(f, f.shape[:2]) o = reshape.reshape(o, o.shape[:2]) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp self.c = variable.Variable(xp.zeros((x.shape[0], self.state_size), dtype=x.dtype), volatile="auto") lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a.data), a.shape[1])) i = reshape.reshape(i, (len(i.data), i.shape[1])) f = reshape.reshape(f, (len(f.data), f.shape[1])) o = reshape.reshape(o, (len(o.data), o.shape[1])) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def forward(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp with chainer.using_device(self.device): self.c = variable.Variable( xp.zeros((len(x), self.state_size), dtype=x.dtype)) lstm_in = reshape.reshape( lstm_in, (len(lstm_in), lstm_in.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, a.shape[:2]) i = reshape.reshape(i, i.shape[:2]) f = reshape.reshape(f, f.shape[:2]) o = reshape.reshape(o, o.shape[:2]) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def __call__(self, *cshsx): """Returns new cell state and output of Child-Sum TreeLSTM. Args: cshsx (list of :class:`~chainer.Variable`): Variable arguments which include all cell vectors and all output vectors of variable children, and an input vector. Returns: tuple of ~chainer.Variable: Returns :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents new cell state vector, and :math:`h_{new}` is new output vector. """ cs = cshsx[:len(cshsx) // 2] hs = cshsx[len(cshsx) // 2:-1] x = cshsx[-1] assert (len(cshsx) % 2 == 1) assert (len(cs) == len(hs)) if x is None: if any(c is not None for c in cs): base = [c for c in cs if c is not None][0] elif any(h is not None for h in hs): base = [h for h in hs if h is not None][0] else: raise ValueError('All inputs (cs, hs, x) are None.') batchsize, dtype = base.shape[0], base.dtype x = self.xp.zeros((batchsize, self.in_size), dtype=dtype) W_x_in = self.W_x(x) W_x_aio_in, W_x_f_in = split_axis.split_axis(W_x_in, [3 * self.state_size], axis=1) if len(hs) == 0: aio_in = W_x_aio_in a, i, o = split_axis.split_axis(aio_in, 3, axis=1) c = sigmoid.sigmoid(i) * tanh.tanh(a) h = sigmoid.sigmoid(o) * tanh.tanh(c) return c, h hs = self._pad_zero_nodes(hs, (x.shape[0], self.state_size), dtype=x.dtype) cs = self._pad_zero_nodes(cs, (x.shape[0], self.state_size), dtype=x.dtype) aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in W_h_fs_in = concat.concat(split_axis.split_axis(self.W_h_f( concat.concat(hs, axis=0)), len(hs), axis=0), axis=1) f_in = W_h_fs_in + \ concat.concat([W_x_f_in] * len(hs), axis=1) tree_lstm_in = concat.concat([aio_in, f_in], axis=1) return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
def __call__(self, x, h, c): ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h) + self.W_fc(c)) ct = tanh.tanh(self.W_cx(x) + self.W_ch(h)) c = ft * c + (1 - ft) * ct ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h) + self.W_oc(c)) h = ot * tanh.tanh(c) return h, c
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp self.c = variable.Variable(xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype), volatile='auto') lstm_in = reshape.reshape( lstm_in, (len(lstm_in.data), lstm_in.data.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a.data), a.data.shape[1])) i = reshape.reshape(i, (len(i.data), i.data.shape[1])) f = reshape.reshape(f, (len(f.data), f.data.shape[1])) o = reshape.reshape(o, (len(o.data), o.data.shape[1])) peep_in_i = self.peep_i(self.c) peep_in_f = self.peep_f(self.c) a = tanh.tanh(a) i = sigmoid.sigmoid(i + peep_in_i) f = sigmoid.sigmoid(f + peep_in_f) self.c = a * i + f * self.c peep_in_o = self.peep_o(self.c) o = sigmoid.sigmoid(o + peep_in_o) self.h = o * tanh.tanh(self.c) return self.h
def __call__(self, x, h, c): ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h)) it = sigmoid.sigmoid(self.W_ix(x) + self.W_ih(h)) ct = tanh.tanh(self.W_cx(x) + self.W_ch(h)) ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h)) c = ft * c + it * ct h = ot * tanh.tanh(c) return h, c
def __call__(self, x, h, c): ft = sigmoid.sigmoid(self.W_fx(x) + self.W_fh(h) + self.W_fc(c)) it = sigmoid.sigmoid(self.W_ix(x) + self.W_ih(h) + self.W_ic(c)) ct = tanh.tanh(self.W_cx(x) + self.W_ch(h)) c = ft * c + it * ct ot = sigmoid.sigmoid(self.W_ox(x) + self.W_oh(h) + self.W_oc(c)) h = ot * tanh.tanh(c) return h, c
def forward(self, *cshsx): """Returns new cell state and output of Child-Sum TreeLSTM. Args: cshsx (list of :class:`~chainer.Variable`): Variable arguments which include all cell vectors and all output vectors of variable children, and an input vector. Returns: tuple of ~chainer.Variable: Returns :math:`(c_{new}, h_{new})`, where :math:`c_{new}` represents new cell state vector, and :math:`h_{new}` is new output vector. """ cs = cshsx[:len(cshsx) // 2] hs = cshsx[len(cshsx) // 2:-1] x = cshsx[-1] assert(len(cshsx) % 2 == 1) assert(len(cs) == len(hs)) if x is None: if any(c is not None for c in cs): base = [c for c in cs if c is not None][0] elif any(h is not None for h in hs): base = [h for h in hs if h is not None][0] else: raise ValueError('All inputs (cs, hs, x) are None.') batchsize, dtype = base.shape[0], base.dtype x = self.xp.zeros( (batchsize, self.in_size), dtype=dtype) W_x_in = self.W_x(x) W_x_aio_in, W_x_f_in = split_axis.split_axis( W_x_in, [3 * self.state_size], axis=1) if len(hs) == 0: aio_in = W_x_aio_in a, i, o = split_axis.split_axis(aio_in, 3, axis=1) c = sigmoid.sigmoid(i) * tanh.tanh(a) h = sigmoid.sigmoid(o) * tanh.tanh(c) return c, h hs = self._pad_zero_nodes( hs, (x.shape[0], self.state_size), dtype=x.dtype) cs = self._pad_zero_nodes( cs, (x.shape[0], self.state_size), dtype=x.dtype) aio_in = self.W_h_aio(sum(hs)) + W_x_aio_in W_h_fs_in = concat.concat(split_axis.split_axis( self.W_h_f(concat.concat(hs, axis=0)), len(hs), axis=0), axis=1) f_in = W_h_fs_in + \ concat.concat([W_x_f_in] * len(hs), axis=1) tree_lstm_in = concat.concat([aio_in, f_in], axis=1) return tree_lstm.tree_lstm(*(cs + (tree_lstm_in, )))
def forward(self, x, y): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ if self.upward.has_uninitialized_params: in_size = x.size // x.shape[0] self.upward._initialize_params(in_size) self._initialize_params() batch = x.shape[0] lstm_in = self.upward(x) if self.h is not None: h_size = self.h.shape[0] if batch == 0: h_rest = self.h elif h_size < batch: msg = ('The batch size of x must be equal to or less than the ' 'size of the previous state h.') raise TypeError(msg) elif h_size > batch: h_update, h_rest = split_axis.split_axis( self.h, [batch], axis=0) lstm_in += self.lateral(h_update) else: lstm_in += self.lateral(self.h) if self.c is None: xp = self.xp self.c = variable.Variable( xp.zeros((batch, self.state_size), dtype=x.dtype), volatile='auto') r = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.data.shape[1] // 4, 4) + lstm_in.data.shape[2:]) a, i, f, o = [r[:, :, i] for i in range(4)] # self.c, y = lstm.lstm(self.c,lstm_in) a = tanh.tanh(a) # tanh.tanh(a) i = sigmoid.sigmoid(i) f = sigmoid.sigmoid(f) o = sigmoid.sigmoid(o) self.c = a * i + f * self.c + tanh(self.w_y(y)) self.h = o * tanh.tanh(self.c) return self.h
def __call__(self, h, x): x_g = self.W_xh(x) z_g = tanh.tanh(self.W_zxh(x_g * h)) z_out = sigmoid.sigmoid(self.W_go(z_g * h)) z_t = hard_sigmoid(self.W_xz(x) + self.W_hz(h)) h_t = (1 - z_t) * h + z_t * z_out return h_t
def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = ( linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def _one_directional_loop(di): # di=0, forward RNN # di=1, backward RNN xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = hx[layer_idx] h_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) rnn_in = (linear.linear(x, xws[layer_idx], xbs[layer_idx]) + linear.linear(h, hws[layer_idx], hbs[layer_idx])) if activation == 'tanh': h_bar = tanh.tanh(rnn_in) elif activation == 'relu': h_bar = relu.relu(rnn_in) if h_rest is not None: h = concat.concat([h_bar, h_rest], axis=0) else: h = h_bar h_list.append(h_bar) return h, h_list
def __call__(self, h, x): x_g = self.W_xh(x) z_g = tanh.tanh(self.W_zxh(x_g * h)) z_out = sigmoid.sigmoid(self.W_go(z_g * h)) z_t = hard_sigmoid(self.W_xz(x) + self.W_hz(h)) h_t = linear_interpolate(z_t, z_out, h) return h_t
def f(x, h, c, w, b): xw, hw = w xb, hb = b rnn_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb) if activation == 'tanh': return tanh.tanh(rnn_in), None elif activation == 'relu': return relu.relu(rnn_in), None
def __call__(self, x): """Updates the internal state and returns the LSTM outputs. Args: x (~chainer.Variable): A new batch from the input sequence. Returns: ~chainer.Variable: Outputs of updated LSTM units. """ lstm_in = self.upward(x) if self.h is not None: lstm_in += self.lateral(self.h) else: xp = self.xp with cuda.get_device(self._device_id): self.h = variable.Variable( xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype), volatile='auto') if self.c is None: xp = self.xp with cuda.get_device(self._device_id): self.c = variable.Variable( xp.zeros((len(x.data), self.state_size), dtype=x.data.dtype), volatile='auto') lstm_in = reshape.reshape(lstm_in, (len(lstm_in.data), lstm_in.data.shape[1] // 4, 4)) a, i, f, o = split_axis.split_axis(lstm_in, 4, 2) a = reshape.reshape(a, (len(a.data), self.state_size)) i = reshape.reshape(i, (len(i.data), self.state_size)) f = reshape.reshape(f, (len(f.data), self.state_size)) o = reshape.reshape(o, (len(o.data), self.state_size)) c_tmp = tanh.tanh(a) * sigmoid.sigmoid(i) + sigmoid.sigmoid(f) * self.c self.c = zoneout.zoneout(self.c, c_tmp, self.c_ratio, self.train) self.h = zoneout.zoneout(self.h, sigmoid.sigmoid(o) * tanh.tanh(c_tmp), self.h_ratio, self.train) return self.h
def __call__(self, x): ft = self.W_fx(x) ct = self.W_cx(x) ot = self.W_ox(x) if self.h is not None and self.c is not None: ft += self.W_fh(h) + self.W_fc(self.c) ct += self.W_ch(h) ot += self.W_oh(h) ft = sigmoid.sigmoid(ft) ct = tanh.tanh(ct) ot = sigmoid.sigmoid(ot + self.W_oc(ct)) c = (1 - ft) * ct if self.c is not none: self.c += ft * c self.h = ot * tanh.tanh(self.c) return self.h
def __call__(self, x): ft = self.W_fx(x) it = self.W_ix(x) ct = self.W_cx(x) ot = self.W_ox(x) if self.h is not None: ft += self.W_fh(h) it += self.W_ih(h) ct += self.W_ch(h) ot += self.W_oh(h) ft = sigmoid.sigmoid(ft) it = sigmoid.sigmoid(it) ct = tanh.tanh(ct) ot = sigmoid.sigmoid(ot) c = it * ct if self.c is not none: c += ft * self.c self.c = c self.h = ot * tanh.tanh(self.c) return self.h
def __call__(self, x): z = self.W_z(x) h_bar = self.W(x) if self.h is not None: r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h)) z += self.U_z(self.h) h_bar += self.U(r * self.h) z = sigmoid.sigmoid(z) h_bar = tanh.tanh(h_bar) h_new = z * h_bar if self.h is not None: h_new += (1 - z) * self.h self.h = h_new return self.h
def _gru(x, h, c, w, b): xw = concat.concat([w[0], w[1], w[2]], axis=0) hw = concat.concat([w[3], w[4], w[5]], axis=0) xb = concat.concat([b[0], b[1], b[2]], axis=0) hb = concat.concat([b[3], b[4], b[5]], axis=0) gru_x = linear.linear(x, xw, xb) gru_h = linear.linear(h, hw, hb) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) return (1 - z) * h_bar + z * h, None
def __call__(self, a_list, state, batch_size, xp): e_list = [] sum_e = xp.zeros((batch_size, 1), dtype=xp.float32) for a in a_list: v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1))) w = self.vw(v) e = exp(w) e_list.append(e) sum_e = sum_e + e context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32) for a, e in zip(a_list, e_list): e /= sum_e context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size)) return context, e_list, sum_e
def faster_call(self, h, x): r_z_h_x = self.W_r_z_h(x) r_x, z_x, h_x = split_axis(r_z_h_x, (self.n_units, self.n_units * 2), axis=1) assert r_x.data.shape[1] == self.n_units assert z_x.data.shape[1] == self.n_units assert h_x.data.shape[1] == self.n_units r_z_h = self.U_r_z(h) r_h, z_h = split_axis(r_z_h, (self.n_units, ), axis=1) r = sigmoid.sigmoid(r_x + r_h) z = sigmoid.sigmoid(z_x + z_h) h_bar = tanh.tanh(h_x + self.U(r * h)) h_new = (1 - z) * h + z * h_bar return h_new
def __call__(self, x): z = self.W_z(x) h_bar = self.W(x) if self.h is not None: r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h)) z += self.U_z(self.h) h_bar += self.U(r * self.h) z = sigmoid.sigmoid(z) h_bar = tanh.tanh(h_bar) if self.h is not None: h_new = linear_interpolate.linear_interpolate(z, h_bar, self.h) else: h_new = z * h_bar self.h = h_new return self.h
def forward(self, x): z = self.W_z(x) h_bar = self.W(x) if self.h is not None: r = sigmoid.sigmoid(self.W_r(x) + self.U_r(self.h)) z += self.U_z(self.h) h_bar += self.U(r * self.h) z = sigmoid.sigmoid(z) h_bar = tanh.tanh(h_bar) if self.h is not None: h_new = linear_interpolate.linear_interpolate(z, h_bar, self.h) else: h_new = z * h_bar self.h = h_new return self.h
def __call__(self, x): z = self.W_z(x) h_bar = self.W(x) if self.h is not None: r = hard_sigmoid.hard_sigmoid(self.W_r(x) + self.U_r(self.h)) z += self.U_z(self.h) h_bar += self.U(r * self.h) # this may differs by version z = hard_sigmoid.hard_sigmoid(z) h_bar = tanh.tanh(h_bar) if self.h is not None: h_new = linear_interpolate.linear_interpolate(z, self.h, h_bar) #(z, h_bar, self.h) else: h_new = ( 1- z) * h_bar self.h = h_new return self.h
def _call_mgu(self, h, x): f = sigmoid.sigmoid(self.W_f(concat.concat([h, x]))) h_bar = tanh.tanh(self.W_h(concat.concat([f * h, x]))) h_new = linear_interpolate.linear_interpolate(f, h_bar, h) return h_new
def __call__(self, h, x): r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h)) z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h)) h_bar = tanh.tanh(self.W(x) + self.U(r * h)) h_new = linear_interpolate.linear_interpolate(z, h_bar, h) return h_new
def __call__(self, h, x): r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h)) z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h)) h_bar = tanh.tanh(self.W(x) + self.U(r * h)) h_new = (1 - z) * h + z * h_bar return h_new
def _one_directional_loop(di): # di=0, forward GRU # di=1, backward GRU xs_list = xs_next if di == 0 else reversed(xs_next) layer_idx = direction * layer + di h = h0[layer_idx] # h:d_bar_s_1 # h_bar:d_s ''' print(len(xs_list)) print(len(xs_list[0])) print(len(xs_list[0][0])) ''' h_list = [] h_bar_list = [] c_s_list = [] z_s_list = [] for x in xs_list: batch = x.shape[0] if h.shape[0] > batch: h, h_rest = split_axis.split_axis(h, [batch], axis=0) else: h_rest = None if layer > 0: x = dropout.dropout(x, ratio=dropout_ratio) gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx]) gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx]) W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1) U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1) r = sigmoid.sigmoid(W_r_x + U_r_h) z = sigmoid.sigmoid(W_z_x + U_z_h) h_bar = tanh.tanh(W_x + r * U_x) h_bar = (1 - z) * h_bar + z * h phi_d = linear.linear(h_bar, W2, B2) ''' print(type(phi_ht), len(phi_ht)) print(type(phi_ht[0]), len(phi_ht[0])) print(type(phi_ht[0][0]), len(phi_ht[0][0])) print(type(phi_d), len(phi_d)) print(type(phi_d[0]), len(phi_d[0]), phi_d[0].shape) ''' #phi_ht_len = [t.shape[1] for t in phi_ht] #phi_ht_section = np.cumsum(phi_ht_len[:-1]) #concat_phi_ht = F.concat(phi_ht, axis=1) #concat_phi_d = [F.concat([phi_d[i]]*phi_ht_len[i], axis=0) for i in range(batch)] #concat_phi_d = F.concat(concat_phi_d, axis=0) #concat_phi_d = F.concat(F.transpose(phi_d), axis=0) u_st = list( map( lambda x, y: reshape.reshape((linear.linear( x, reshape.reshape(y, (1, len(y))))), (len(x), )), phi_ht, phi_d)) #(4) sum_u = list(map(F.sum, u_st)) alpha_st = list( map(lambda x, y: x / F.broadcast_to(y, x.shape), u_st, sum_u)) #(3) z_s = list(map(F.argmax, alpha_st)) z_s = list(map(lambda x: F.broadcast_to(x, (1, )), z_s)) z_s = F.concat(z_s, axis=0) ''' print(type(alpha_st),len(alpha_st)) print(type(alpha_st[0]),len(alpha_st[0])) print(alpha_st[0].shape) print(ht[0].shape) ''' c_s = list( map( lambda x, y: F.sum(F.broadcast_to( reshape.reshape(x, (x.shape[0], 1)), y.shape) * y, axis=0), alpha_st, ht)) #(2) c_s_2d = list( map(lambda x: reshape.reshape(x, (1, len(x))), c_s)) concat_c_s = F.concat(c_s_2d, axis=0) c_s = list( map(lambda x: F.broadcast_to(x, (1, len(x))), c_s)) c_s = F.concat(c_s, axis=0) ''' print(type(c_s), len(c_s)) print(type(c_s[0]), len(c_s[0]), c_s[0].shape) ''' h = F.relu( linear.linear(F.concat([concat_c_s, h_bar], axis=1), W3, B3)) h_list.append(h) h_bar_list.append(h_bar) c_s_list.append(c_s) z_s_list.append(z_s) #単語数の違いを担保 if h_rest is not None: h = concat.concat([h, h_rest], axis=0) h_bar = concat.concat([h_bar, h_rest], axis=0) return h_list, h_bar_list, c_s_list, z_s_list
def compute_output(z_x, z_h, h_x, h, hh): z = sigmoid.sigmoid(z_x + z_h) h_bar = tanh.tanh(h_x + hh) h_new = (1 - z) * h + z * h_bar return h_new
def forward(self, h, x): r = sigmoid.sigmoid(self.W_r(x) + self.U_r(h)) z = sigmoid.sigmoid(self.W_z(x) + self.U_z(h)) h_bar = tanh.tanh(self.W(x) + self.U(r * h)) h_new = linear_interpolate.linear_interpolate(z, h_bar, h) return h_new