示例#1
0
def lstm(_inputs, initial_state_h, initial_state_c, *parameters):
    # _inputs: a list with length num_steps,
    # corresponding element: batch_size * input_dim matrix

    H = initial_state_h  # hidden state
    C = initial_state_c  # memory cell

    [W_xi, W_hi, b_i,
     W_xf, W_hf, b_f,
     W_xo, W_ho, b_o,
     W_xc, W_hc, b_c,
     W_hy, b_y] = parameters

    _outputs = []

    for X in _inputs:
        # compute INPUT gate from input and last/initial hidden state
        input_gate = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
        # compute FORGET gate from input and last/initial hidden state
        forget_gate = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
        # compute OUTPUT gate from input and last/initial hidden state
        output_gate = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
        # compute memory cell candidate from input and last/initial hidden state
        memory_cell_candidate = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)
        # compute memory cell from last memory cell and memory cell candidate
        C = forget_gate * C + input_gate * memory_cell_candidate
        # compute hidden state from output gate and memory cell
        H = output_gate * nd.tanh(C)
        # compute output from hidden state
        Y = nd.dot(H, W_hy) + b_y
        _outputs.append(Y)

    return _outputs, H, C
示例#2
0
def lstm(inputs, state, params):
    # inputs和outputs皆为num_steps个形状为(batch_size, vocab_size)的矩阵
    [
        W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c,
        W_xi2, W_hi2, b_i2, W_xf2, W_hf2, b_f2, W_xo2, W_ho2, b_o2, W_xc2,
        W_hc2, b_c2, W_hq, b_q
    ] = params
    (H, C) = state
    outputs = []
    for X in inputs:
        I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
        F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
        O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
        C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)
        C1 = F * C + I * C_tilda
        H1 = C.tanh() * O

        I2 = nd.sigmoid(nd.dot(H, W_xi2) + nd.dot(H1, W_hi2) + b_i2)
        F2 = nd.sigmoid(nd.dot(H, W_xf2) + nd.dot(H1, W_hf2) + b_f2)
        O2 = nd.sigmoid(nd.dot(H, W_xo2) + nd.dot(H1, W_ho2) + b_o)
        C_tilda = nd.tanh(nd.dot(H, W_xc2) + nd.dot(H1, W_hc2) + b_c2)
        C2 = F2 * C1 + I2 * C_tilda
        H2 = C2.tanh() * O2

        Y = nd.dot(H2, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H2, C2)
示例#3
0
    def nodeforward(self, x, cs, hs, ctx):
        x = nd.reshape(x, (self.dim_h, ))
        _Ui = nd.zeros((self.dim_h, ), ctx=ctx)
        _Uo = nd.zeros((self.dim_h, ), ctx=ctx)
        _Uu = nd.zeros((self.dim_h, ), ctx=ctx)
        _Uf = [nd.zeros((self.dim_h, ), ctx=ctx) for i in range(len(cs))]

        for idx in range(len(cs)):
            _Ui = nd.add(_Ui, nd.dot(self.Uis[idx].data(), hs[idx]))
            _Uo = nd.add(_Uo, nd.dot(self.Uos[idx].data(), hs[idx]))
            _Uu = nd.add(_Uu, nd.dot(self.Uus[idx].data(), hs[idx]))
            for j in range(len(cs)):
                _Uf[idx] = nd.add(_Uf[idx],
                                  nd.dot(self.Ufs[idx][j].data(), hs[j]))

        i = nd.sigmoid(
            nd.add(nd.add(nd.dot(self.Wi.data(), x), _Ui), self.bi.data()))
        o = nd.sigmoid(
            nd.add(nd.add(nd.dot(self.Wo.data(), x), _Uo), self.bo.data()))
        f = [
            nd.sigmoid(
                nd.add(nd.add(nd.dot(self.Wf.data(), x), _Uf[idx]),
                       self.bf.data())) for idx in range(len(cs))
        ]
        u = nd.tanh(
            nd.add(nd.add(nd.dot(self.Wu.data(), x), _Uu), self.bu.data()))

        c = nd.zeros((self.dim_h, ), ctx=ctx)
        for idx in range(len(cs)):
            c = nd.add(c, nd.multiply(f[idx], cs[idx]))
        c = nd.add(nd.multiply(i, u), c)

        h = nd.multiply(o, nd.tanh(c))
        return c, h
示例#4
0
    def forward(self, input_data):
        freq = input_data[:, 0:2].expand_dims(1)
        input_data = input_data[:, 2:]
        e1_vec_start = FIXED_WORD_LENGTH * DIMENSION
        x = input_data[:, :e1_vec_start].reshape(
            (input_data.shape[0], FIXED_WORD_LENGTH,
             DIMENSION))  # (m, 60, 110)

        e1neimask = input_data[:, e1_vec_start:e1_vec_start +
                               MASK_LENGTH]  # (m, 51)
        e1edge = input_data[:, e1_vec_start + MASK_LENGTH:e1_vec_start +
                            MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH].reshape(
                                (input_data.shape[0], ENTITY_DEGREE,
                                 WORD_DIMENSION * 2))  # (m, 51, 200)
        e1neigh = e1edge[:, :, :WORD_DIMENSION]

        e2_vec_start = e1_vec_start + MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH
        e2neimask = input_data[:, e2_vec_start:e2_vec_start +
                               MASK_LENGTH]  # (m, 51)
        e2edge = input_data[:, e2_vec_start + MASK_LENGTH:e2_vec_start +
                            MASK_LENGTH + ENTITY_EDGE_VEC_LENGTH].reshape(
                                (input_data.shape[0], ENTITY_DEGREE,
                                 WORD_DIMENSION * 2))  # (m, 51,200)
        e2neigh = e2edge[:, :, :WORD_DIMENSION]

        gru = self.gru
        x = nd.transpose(x, axes=(1, 0, 2))
        h = gru(x)
        ht = nd.transpose(h, axes=(1, 0, 2))
        gru_out = self.gru_out
        y1 = gru_out(ht.expand_dims(1))  # (m,200)

        att = self.center_att
        e1edge = nd.tanh(e1edge)
        e1g = att(e1edge) * freq[:, :, :1]  # (m,51,1)
        e1g = e1g * e1neimask.expand_dims(2)
        e1g = nd.softmax(e1g, axis=1)
        e1gt = nd.transpose(e1g, axes=(0, 2, 1))  # (m,1,151)
        e1n = nd.batch_dot(e1gt, e1neigh)  # (m,1,100)
        e1n = e1n.reshape((e1n.shape[0], 100))  # (m,100)

        e2edge = nd.tanh(e2edge)
        e2g = att(e2edge) * freq[:, :, 1:]  # (m,51,1)
        e2g = e2g * e2neimask.expand_dims(2)
        e2g = nd.softmax(e2g, axis=1)
        e2gt = nd.transpose(e2g, axes=(0, 2, 1))  # (m,1,151)
        e2n = nd.batch_dot(e2gt, e2neigh)  # (m,1,100)
        e2n = e2n.reshape((e2n.shape[0], 100))  # (m,100)

        center_y = nd.concat(e1n, e2n, dim=1)  # (m,200)
        center_out = self.center_out
        center_y = center_out(center_y)

        out = self.output
        y4 = nd.concat(y1, center_y, dim=1)
        y5 = out(y4)
        return y5
示例#5
0
    def forward(self, x):
        if self.dependent_G:
            g = nd.sigmoid(nd.dot(x, self.G.data()))
        else:
            g = nd.sigmoid(self.G.data())

        W0 = nd.tanh(self.W0_hat.data()) * nd.sigmoid(self.M0_hat.data())
        W1 = nd.tanh(self.W1_hat.data()) * nd.sigmoid(self.M1_hat.data())
        a = nd.dot(x, W0)
        m = nd.exp(nd.dot(nd.log(nd.abs(x) + 1e-10), W1))
        y = g * a + (1 - g) * m

        return y
示例#6
0
def lstm(inputs, state, params):
    W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q = params
    H, C = state
    outputs = []
    for X in inputs:
        I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
        F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
        O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
        C_ = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)
        C = F * C + I * C_
        H = O * nd.tanh(C)
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, C)
示例#7
0
        def squash_policy(mu, pi, logp_pi):
            def clip_pass_gradient(x, l=-1., u=1.):
                clip_up = nd.cast(x > u, "float32")
                clip_low = nd.cast(x < l, "float32")
                return x + nd.stop_gradient((u - x) * clip_up +
                                            (l - x) * clip_low)

            mu = nd.tanh(mu)
            pi = nd.tanh(pi)
            # avoid machine precision error, clip 1-pi**2 to [0, 1]
            logp_pi = logp_pi - nd.sum(
                nd.log(clip_pass_gradient(1 - nd.square(pi), l=0, u=1) + 1e-6),
                axis=1)
            return mu, pi, logp_pi
示例#8
0
def gru(_inputs, initial_state, *parameters):
    # _inputs: a list with length num_steps,
    # corresponding element: batch_size * input_dim matrix

    H = initial_state

    [W_xz, W_hz, b_z,
     W_xr, W_hr, b_r,
     W_xh, W_hh, b_h,
     W_hy, b_y] = parameters

    _outputs = []

    for X in _inputs:
        # compute update gate from input and last/initial hidden state
        update_gate = nd.sigmoid(nd.dot(X, W_xz) + nd.dot(H, W_hz) + b_z)
        # compute reset gate from input and last/initial hidden state
        reset_gate = nd.sigmoid(nd.dot(X, W_xr) + nd.dot(H, W_hr) + b_r)
        # compute candidate hidden state from input, reset gate and last/initial hidden state
        H_candidate = nd.tanh(nd.dot(X, W_xh) + reset_gate * nd.dot(H, W_hh) + b_h)
        # compute hidden state from candidate hidden state and last hidden state
        H = update_gate * H + (1 - update_gate) * H_candidate
        # compute output from hidden state
        Y = nd.dot(H, W_hy) + b_y
        _outputs.append(Y)

    return _outputs, H
示例#9
0
def rnn(inputs, h, w_xh, w_hh, b_h, w_hy, b_y):
    output = []
    for x in inputs:
        h = nd.tanh(nd.dot(x, w_xh) + nd.dot(h, w_hh) + b_h)
        y = nd.dot(h, w_hy) + b_y
        output.append(y)
    return (output, h)
示例#10
0
    def forward_single(self, feature, data, begin_state):
        """ unroll one step

        Parameters
        ----------
        feature: a NDArray with shape [n, d].
        data: a NDArray with shape [n, b, d].        
        begin_state: a NDArray with shape [n, b, d].
        
        Returns
        -------
        output: ouptut of the cell, which is a NDArray with shape [n, b, d]
        states: a list of hidden states (list of hidden units with shape [n, b, d]) of RNNs.
        
        """
        if begin_state is None:
            num_nodes, batch_size, _ = data.shape
            begin_state = [nd.zeros((num_nodes, batch_size, self.hidden_size), ctx=feature.context)]

        prev_state = begin_state[0]
        data_and_state = nd.concat(data, prev_state, dim=-1)
        z = nd.sigmoid(self.dense_z(feature, data_and_state))
        r = nd.sigmoid(self.dense_r(feature, data_and_state))

        state = z * prev_state + (1 - z) * nd.tanh(self.dense_i2h(feature, data) + self.dense_h2h(feature, r * prev_state))
        return state, [state]
示例#11
0
 def forward(self, x):
     x = nd.pick(x,
                 nd.broadcast_to(self._dim.data(), x.shape[0]),
                 keepdims=True)
     x -= self._split.data()
     x *= nd.relu(self._sharpness.data())
     return nd.tanh(x)
示例#12
0
    def forward(self, query, values, head=False):
        """

        计算Attention权重与输出向量

        :param query: 查询,即当前步Decoder的输入
        :param values: 值,即Encoder中每一个时间步向量
        :return: (Attention输出向量, Attention权重)
        """
        #print('In Attention')
        hidden_with_time_axis = nd.expand_dims(query, 1)
        #print('hidden_with_time:', hidden_with_time_axis.shape)
        score = self.V(
            nd.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        #print('\t score:',score.shape)
        attention_weights = nd.softmax(score, axis=1)

        #print('\t attention_weight:', attention_weights.shape)
        #print('\t values:', values.shape)

        context_vector = attention_weights * values

        #print('\t mid_context_vector:',context_vector.shape)

        if head is True:
            context_vector = nd.sum(context_vector, axis=2)
        else:
            context_vector = nd.sum(context_vector, axis=1)
        # print('\t context',context_vector.shape)
        context_vector = nd.expand_dims(context_vector, axis=0)
        return context_vector, attention_weights
    def forward(self, current, previous, doc_encode):
        """[summary]

        Args:
            current ([type]): h_j (batch_size, sentence_hidden_size * 2)
            previous ([type]): s_j (batch_size, sentence_hidden_size * 2)
            doc_encode ([type]): d (batch_size, ndoc_dims)
        """
        # content: (batch_size, 1)
        content = self.content_encoder(current)
        # salience: (batch_size, sentence_hidden_size * 2)
        salience = self.salience_encoder(doc_encode)
        salience = current * salience
        # salience: (batch_size,)
        salience = nd.sum_axis(salience, -1)
        # salience: (batch_size, 1)
        salience = nd.expand_dims(salience, -1)

        # novelty: (bathc_size, sentence_hidden_size * 2)
        novelty = self.novelty_encoder(nd.tanh(previous))
        novelty = current * novelty
        # salience: (batch_size,)
        novelty = nd.sum_axis(novelty, -1)
        # salience: (batch_size, 1)
        novelty = nd.expand_dims(novelty, -1)

        # P: (batch_size, 1)
        P = nd.sigmoid(content + salience - novelty)

        return P
示例#14
0
文件: LSTM.py 项目: JasonPin/Lyric
def lstm_rnn(inputs, state_h, state_c, *params):
    '''

    :param inputs: 输入
    :param state_h: 上一时刻的输出
    :param state_c: 上一时刻的状态
    :param params: 参数对
    :return: 输出
    输入门:It=σ(Xt*Wxi+Ht−1*Whi+bi)
    遗忘门:Ft=σ(Xt*Wxf+Ht−1*Whf+bf)
    输出门:Ot=σ(Xt*Wxo+Ht−1*Who+bo)
    输入状态:I_state=tanh(Xt*Wxc+Ht−1*Whc+bc)
    输出:Y=Why*Ht-1+by
    '''
    [
        W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c,
        W_hy, b_y
    ] = params
    H = state_h  # 与输入组成一个输入门状态,控制有多少新输入补充到最新记忆里
    C = state_c  # 记录这一时刻的状态,传给下一时刻
    outputs = []
    for X in inputs:
        I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)  # 输入门,就是
        C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) +
                          b_c)  # 输入门状态用来控制有多少输入信息补充到最新的记忆
        F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) +
                       b_f)  # 遗忘门,控制上一刻有多少信息被遗忘
        O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)  # 输出门
        C = F * C + C_tilda * I  # 更新作为下一刻的状态
        H = O * C.tanh()
        Y = nd.dot(H, W_hy) + b_y  # 这一刻的输出作为下一刻的输入
        outputs.append(Y)
    return (outputs, H, C)
示例#15
0
def rnn(inputs, state, params):
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, )
示例#16
0
    def lstm(self, inputs, state):
        [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c,
         W_hq, b_q] = self.params
        (H, C) = state
        outputs = []

        for X in inputs:
            I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i)
            F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f)
            O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o)
            C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c)

            C = I * C_tilda + F * C
            H = nd.tanh(C) * O
            Y = nd.dot(H, W_hq) + b_q
            outputs.append(Y)

        return outputs, (H, C)
示例#17
0
def rnn(inputs, state, *params):
    H = state
    W_xh, W_hh, b_h, W_hy, b_y = params
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hy) + b_y
        outputs.append(Y)
    return outputs, H
示例#18
0
    def forward(self, x, crisp=False):
        pick_index = nd.broadcast_to(self._dim.data(), x.shape[0])
        x = nd.pick(x, pick_index, keepdims=True)
        x = x - self._split.data()
        if (crisp == False):
            x = x * nd.relu(self._sharpness.data()) * self._gate()
            # x = x * nd.relu(self._sharpness.data())

        return nd.tanh(x)
示例#19
0
def rnn(inputs, H, W_xh, W_hh, b_h, W_hy, b_y):
    # inputs: num_steps个尺寸为batch_size*vocab_size矩阵
    # H: 尺寸为batch_size * hidden_size矩阵
    # outputs: num_steps个尺寸为batch_size*vocab_size矩阵
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hy) + b_y
        outputs.append(Y)
    return outputs, H
def rnn(inputs, state, params):
    # inputs和outputs皆为num_steps(时间步数)个形状为(batch_size, vocab_size)的矩阵
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state  #H初始状态
    outputs = []
    for X in inputs:  #每批单个字,批量输入
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)  # 更新H,每个H都是本次输入和上次H相加
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)  # 返回时间步矩阵,和最后一个H
示例#21
0
文件: lstm.py 项目: HaoranYi/gitProj
def lstm_rnn(inputs, h, c, temperature=1.0):
    outputs = []
    for X in inputs:
        g = nd.tanh(nd.dot(X, Wxg) + nd.dot(h, Whg) + bg)
        i = nd.sigmoid(nd.dot(X, Wxi) + nd.dot(h, Whi) + bi)
        f = nd.sigmoid(nd.dot(X, Wxf) + nd.dot(h, Whf) + bf)
        o = nd.sigmoid(nd.dot(X, Wxo) + nd.dot(h, Who) + bo)
        #######################
        #
        #######################
        c = f * c + i * g
        h = o * nd.tanh(c)
        #######################
        #
        #######################
        yhat_linear = nd.dot(h, Why) + by
        yhat = softmax(yhat_linear, temperature=temperature)
        outputs.append(yhat)
    return (outputs, h, c)
示例#22
0
 def forward(self, input_data):
     x = nd.transpose(input_data, axes=(1, 0, 2))
     h = nd.transpose(self.gru(x), axes=(1, 0, 2))  # (m,60,100)
     h = nd.tanh(h)
     g = self.att(h)  # (m,60,1)
     g = nd.softmax(g, axis=1)
     gt = nd.transpose(g, axes=(0, 2, 1))  # (m,1,60)
     n = nd.batch_dot(gt, h)
     y = self.att_out(n)
     return self.output(y)
示例#23
0
def rnn(inputs, state, params):
    # inputs和output都是num_steps个形状为(batch_size,vocab_size)
    output = []
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state  # 只有第一个有参数H,见上,shape=(batch_size, num_hiddens)
    for X in inputs:  # 遍历num_steps个
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)  # 计算隐藏状态,并作为返回值保存
        Y = nd.dot(H, W_hq) + b_q
        output.append(Y)  # 追加
    return output, (H, )
示例#24
0
def lstm_rnn(inputs, h, c, temperature=1.0):
    outputs = []
    for X in inputs:
        g = nd.tanh(nd.dot(X, Wxg) + nd.dot(h, Whg) + bg)
        i = nd.sigmoid(nd.dot(X, Wxi) + nd.dot(h, Whi) + bi)
        f = nd.sigmoid(nd.dot(X, Wxf) + nd.dot(h, Whf) + bf)
        o = nd.sigmoid(nd.dot(X, Wxo) + nd.dot(h, Who) + bo)
        #######################
        #
        #######################
        c = f * c + i * g
        h = o * nd.tanh(c)
        #######################
        #
        #######################
        yhat_linear = nd.dot(h, Why) + by
        yhat = softmax(yhat_linear, temperature=temperature)
        outputs.append(yhat)
    return (outputs, h, c)
示例#25
0
def rnn(inputs, state, params):
    # inputs和outputs皆为num_steps个形状为(batch_size, vocab_size)的矩阵
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)
示例#26
0
def rnn(inputs, state, params):
    w_xh, w_hh, b_h, w_ho, b_q = params
    H, = state
    outputs = []
    # inputs和outputs均为num_steps个(batch_size, vocab_size)的矩阵
    for X in inputs:
        H = nd.tanh(nd.dot(X, w_xh) + nd.dot(H, w_hh) + b_h)
        Y = nd.dot(H, w_ho) + b_q
        outputs.append(Y)
    return outputs, (H,)
示例#27
0
def simple_rnn(inputs, state, temperature=1.0):
    outputs = []
    h = state
    for X in inputs:
        h_linear = nd.dot(X, Wxh) + nd.dot(h, Whh) + bh
        h = nd.tanh(h_linear)
        yhat_linear = nd.dot(h, Why) + by
        yhat = softmax(yhat_linear, temperature=temperature)
        outputs.append(yhat)
    return (outputs, h)
示例#28
0
文件: rnn.py 项目: HaoranYi/gitProj
def simple_rnn(inputs, state, temperature=1.0):
    outputs = []
    h = state
    for X in inputs:
        h_linear = nd.dot(X, Wxh) + nd.dot(h, Whh) + bh
        h = nd.tanh(h_linear)
        yhat_linear = nd.dot(h, Why) + by
        yhat = softmax(yhat_linear, temperature=temperature)
        outputs.append(yhat)
    return (outputs, h)
def rnn(inputs, state, params):
    # inputs and outputs are num_step (batchsize, vacasize)
    # Use tanh as activation function
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, )
示例#30
0
文件: rnn.py 项目: flownclouds/d2lzh
    def rnn(self):
        W_xh, W_hh, b_h, W_hq, b_q = self.params
        H, = self.state
        outputs = []

        for X in self.inputs:
            H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
            Y = nd.dot(H, W_hq) + b_q
            outputs.append(Y)

        return outputs, (H,)
示例#31
0
def gru(inputs, state, params):
    W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hq, b_q = params
    H = state
    outputs = []
    for X in inputs:
        Z = nd.sigmoid(nd.dot(X, W_xz) + nd.dot(H, W_hz) + b_z)
        R = nd.sigmoid(nd.dot(X, W_xr) + nd.dot(H, W_hr) + b_r)
        H_ = nd.tanh(nd.dot(X, W_xh) + R * nd.dot(H, W_hh) + b_h)
        H = Z * H + (1 - Z) * H_
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, H
示例#32
0
def rnn(inputs, state, *params):
    # inputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵。
    # H: 尺寸为 batch_size * hidden_dim 矩阵。
    # outputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵。
    H = state
    W_xh, W_hh, b_h, W_hy, b_y = params
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hy) + b_y
        outputs.append(Y)
    return (outputs, H)
示例#33
0
 def check_tanh():
     x = create_input_for_trigonometric_ops(
         [-1 / 4, -1 / 2, 0, 1 / 4, 1 / 2])
     y = nd.tanh(x)
     # expected ouput for indices=(0, 1, -3, -2, -1) after applying tanh()
     expected_output = [
         np.tanh(-1 / 4),
         np.tanh(-1 / 2), 0,
         np.tanh(1 / 4),
         np.tanh(1 / 2)
     ]
     assert_correctness_of_trigonometric_ops(y, expected_output)
def rnn(inputs, state, params):
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    #print(H.shape) # 中间状态的维度
    #print(len(inputs), len(inputs[0]), len(inputs[0][0]),len(inputs[0][0][0])) # 样本被切分用于多次训练
    outputs = []
    for X in inputs:
        #print(X.shape) #每次通过网络的样本个数
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H, )
示例#35
0
文件: gru.py 项目: HaoranYi/gitProj
def gru_rnn(inputs, h, temperature=1.0):
    outputs = []
    for X in inputs:
        z = nd.sigmoid(nd.dot(X, Wxz) + nd.dot(h, Whz) + bz)
        r = nd.sigmoid(nd.dot(X, Wxr) + nd.dot(h, Whr) + br)
        g = nd.tanh(nd.dot(X, Wxh) + nd.dot(r * h, Whh) + bh)
        h = z * h + (1 - z) * g

        yhat_linear = nd.dot(h, Why) + by
        yhat = softmax(yhat_linear, temperature=temperature)
        outputs.append(yhat)
    return (outputs, h)
示例#36
0
文件: base.py 项目: chr5tphr/ecGAN
 def forward(self, x):
     return nd.tanh(x)
示例#37
0
文件: func.py 项目: chr5tphr/ecGAN
 def forward(self, x):
     y = nd.tanh(x)
     self.save_for_backward(x,y)
     return y