示例#1
0
def rnn_cell_forward(xt, a_prev, parameters):
    """
    实现RNN单元的单步前向传播
    参数:
        xt -- 时间步“t”输入的数据,维度为(n_x, m)
        a_prev -- 时间步“t - 1”的隐藏隐藏状态,维度为(n_a, m)
        parameters -- 字典,包含了以下内容:
                        Wax -- 矩阵,输入乘以权重,维度为(n_a, n_x)
                        Waa -- 矩阵,隐藏状态乘以权重,维度为(n_a, n_a)
                        Wya -- 矩阵,隐藏状态与输出相关的权重矩阵,维度为(n_y, n_a)
                        ba  -- 偏置,维度为(n_a, 1)
                        by  -- 偏置,隐藏状态与输出相关的偏置,维度为(n_y, 1)

    返回:
        a_t -- 下一个隐藏状态,维度为(n_a, m)
        yt_pred -- 在时间步“t”的预测,维度为(n_y, m)
        cache -- 反向传播需要的元组,包含了(a_next, a_prev, xt, parameters)
    """

    # 从“parameters”获取参数
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]

    a_t = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)

    y = rnn_utils.softmax(np.dot(Wya, a_t) + by)

    cache = (a_t, a_prev, xt, parameters)
    return a_t, y, cache
示例#2
0
def rnn_cell_forward(xt, a_prev, parameters):
    """
    Implements a single forward step of the RNN-cell as described in Figure (2)

    Vectorized over 'm' samples.

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
      Wax -- Weight matrix multiplying the input,
             numpy array of shape (n_a, n_x)
      Waa -- Weight matrix multiplying the hidden state, numpy array of
             shape (n_a, n_a)
      Wya -- Weight matrix relating the hidden-state to the output,
             numpy array of shape (n_y, n_a)
      ba -- Bias, numpy array of shape (n_a, 1)
      by -- Bias relating the hidden-state to the output, numpy array
            of shape (n_y, 1)
    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass, contains
             (a_next, a_prev, xt, parameters)
    """
    # Retrieve parameters from "parameters"
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]

    a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)
    yt_pred = softmax(np.dot(Wya, a_next) + by)

    # store values you need for backward propagation in cache
    cache = (a_next, a_prev, xt, parameters)
    return a_next, yt_pred, cache
示例#3
0
def rnn_cell_forward(xt, a_prev, parameters):
    """
    Implements a single forward step of the RNN-cell as described in Figure (2)

    Vectorized over 'm' samples.

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
      Wax -- Weight matrix multiplying the input,
             numpy array of shape (n_a, n_x)
      Waa -- Weight matrix multiplying the hidden state, numpy array of
             shape (n_a, n_a)
      Wya -- Weight matrix relating the hidden-state to the output,
             numpy array of shape (n_y, n_a)
      ba -- Bias, numpy array of shape (n_a, 1)
      by -- Bias relating the hidden-state to the output, numpy array
            of shape (n_y, 1)
    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass, contains
             (a_next, a_prev, xt, parameters)
    """
    # Retrieve parameters from "parameters"
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]

    a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)
    yt_pred = softmax(np.dot(Wya, a_next) + by)

    # store values you need for backward propagation in cache
    cache = (a_next, a_prev, xt, parameters)
    return a_next, yt_pred, cache
示例#4
0
def forward_propagation(self, x):

    #The total number of time steps
    T = len(x)

    #During forward prop, we save all the hidden states in s as we need them later. We add one additional element for the initial hidden layer, which we set to 0.
    s = np.zeros((T+1, self.hidden_dim))
    s[-1] = np.zeros(self.hidden_dim) #Set the last vector to zeros.

    #The output at each time step. We save them for later.
    o = np.zeros((T, self.word_dim))

    #For each time step:
    for t in np.arange(T):
        #Note that we are indexing U by x[t]. This is the same as multplying U with a one-hot vector
        '''
        s_t = \tanh(Ux_t + Ws_{t-1})
        '''
        s[t] = np.tanh(self.U[:, x[t]]+ self.W.dot(s[t-1])) #since xt is a one-hot vector, we just need return a column of U for which index of x is 1
        '''
        o_t = \textsf{softmax}(Vs_t)
        '''
        o[t] = rnn_utils.softmax(self.V.dot(s[t]))
    return [o, s]
示例#5
0
文件: RNN.py 项目: TommyZeng/ML
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    根据图4实现一个LSTM单元的前向传播。

    参数:
        xt -- 在时间步“t”输入的数据,维度为(n_x, m)
        a_prev -- 上一个时间步“t-1”的隐藏状态,维度为(n_a, m)
        c_prev -- 上一个时间步“t-1”的记忆状态,维度为(n_a, m)
        parameters -- 字典类型的变量,包含了:
                        Wf -- 遗忘门的权值,维度为(n_a, n_a + n_x)
                        bf -- 遗忘门的偏置,维度为(n_a, 1)
                        Wi -- 更新门的权值,维度为(n_a, n_a + n_x)
                        bi -- 更新门的偏置,维度为(n_a, 1)
                        Wc -- 第一个“tanh”的权值,维度为(n_a, n_a + n_x)
                        bc -- 第一个“tanh”的偏置,维度为(n_a, n_a + n_x)
                        Wo -- 输出门的权值,维度为(n_a, n_a + n_x)
                        bo -- 输出门的偏置,维度为(n_a, 1)
                        Wy -- 隐藏状态与输出相关的权值,维度为(n_y, n_a)
                        by -- 隐藏状态与输出相关的偏置,维度为(n_y, 1)
    返回:
        a_next -- 下一个隐藏状态,维度为(n_a, m)
        c_next -- 下一个记忆状态,维度为(n_a, m)
        yt_pred -- 在时间步“t”的预测,维度为(n_y, m)
        cache -- 包含了反向传播所需要的参数,包含了(a_next, c_next, a_prev, c_prev, xt, parameters)

    注意:
        ft/it/ot表示遗忘/更新/输出门,cct表示候选值(c tilda),c表示记忆值。
    """

    # 从“parameters”中获取相关值
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]

    # 获取 xt 与 Wy 的维度信息
    n_x, m = xt.shape
    n_y, n_a = Wy.shape

    # 1.连接 a_prev 与 xt
    contact = np.zeros([n_a + n_x, m])
    contact[:n_a, :] = a_prev
    contact[n_a:, :] = xt

    # 2.根据公式计算ft、it、cct、c_next、ot、a_next

    ## 遗忘门,公式1
    ft = rnn_utils.sigmoid(np.dot(Wf, contact) + bf)

    ## 更新门,公式2
    it = rnn_utils.sigmoid(np.dot(Wi, contact) + bi)

    ## 更新单元,公式3
    cct = np.tanh(np.dot(Wc, contact) + bc)

    ## 更新单元,公式4
    # c_next = np.multiply(ft, c_prev) + np.multiply(it, cct)
    c_next = ft * c_prev + it * cct
    ## 输出门,公式5
    ot = rnn_utils.sigmoid(np.dot(Wo, contact) + bo)

    ## 输出门,公式6
    # a_next = np.multiply(ot, np.tan(c_next))
    a_next = ot * np.tanh(c_next)
    # 3.计算LSTM单元的预测值
    yt_pred = rnn_utils.softmax(np.dot(Wy, a_next) + by)

    # 保存包含了反向传播所需要的参数
    cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)

    return a_next, c_next, yt_pred, cache
示例#6
0
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    根据图4实现一个LSTM单元的前向传播。

    参数:
        xt -- 在时间步“t”输入的数据,维度为(n_x, m)
        a_prev -- 上一个时间步“t-1”的隐藏状态,维度为(n_a, m)
        c_prev -- 上一个时间步“t-1”的记忆状态,维度为(n_a, m)
        parameters -- 字典类型的变量,包含了:
                        Wf -- 遗忘门的权值,维度为(n_a, n_a + n_x)
                        bf -- 遗忘门的偏置,维度为(n_a, 1)
                        Wi -- 更新门的权值,维度为(n_a, n_a + n_x)
                        bi -- 更新门的偏置,维度为(n_a, 1)
                        Wc -- 第一个“tanh”的权值,维度为(n_a, n_a + n_x)
                        bc -- 第一个“tanh”的偏置,维度为(n_a, n_a + n_x)
                        Wo -- 输出门的权值,维度为(n_a, n_a + n_x)
                        bo -- 输出门的偏置,维度为(n_a, 1)
                        Wy -- 隐藏状态与输出相关的权值,维度为(n_y, n_a)
                        by -- 隐藏状态与输出相关的偏置,维度为(n_y, 1)
    返回:
        a_next -- 下一个隐藏状态,维度为(n_a, m)
        c_next -- 下一个记忆状态,维度为(n_a, m)
        yt_pred -- 在时间步“t”的预测,维度为(n_y, m)
        cache -- 包含了反向传播所需要的参数,包含了(a_next, c_next, a_prev, c_prev, xt, parameters)
    """

    # 从“parameters”中获取相关值
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]

    contact = np.vstack((a_prev, xt))
    #遗忘门
    Gf = rnn_utils.sigmoid(np.dot(Wf, contact) + bf)
    #更新门
    Gi = rnn_utils.sigmoid(np.dot(Wi, contact) + bi)
    #输出门
    Go = rnn_utils.sigmoid(np.dot(Wo, contact) + bo)

    # 更新单元
    tmp_ct = np.tanh(np.dot(Wc, contact) + bc)

    # 更新单元
    ct = np.multiply(Gi, tmp_ct) + np.multiply(Gf, c_prev)

    #输出

    a_next = np.multiply(Go, np.tanh(ct))

    #计算LSTM单元的预测值

    y_pre = rnn_utils.softmax(np.dot(Wy, a_next) + by)

    cache = (a_next, ct, a_prev, c_prev, Gf, Gi, tmp_ct, Go, xt, parameters)
    return a_next, ct, y_pre, cache
示例#7
0
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    Implement a single forward step of the LSTM-cell as described in Figure (4)

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
      Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x)
      bf -- Bias of the forget gate, numpy array of shape (n_a, 1)
      Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x)
      bi -- Bias of the update gate, numpy array of shape (n_a, 1)
      Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x)
      bc --  Bias of the first "tanh", numpy array of shape (n_a, 1)
      Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x)
      bo --  Bias of the output gate, numpy array of shape (n_a, 1)
      Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
      by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)

    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    c_next -- next memory state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass,
             contains (a_next, c_next, a_prev, c_prev, xt, parameters)

    Note: ft/it/ot stand for the forget/update/output gates, cct stands for the
          candidate value (c tilde), c stands for the memory value
    """

    # Retrieve parameters from "parameters"
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]

    # Retrieve dimensions from shapes of xt and Wy
    n_x, m = xt.shape
    n_y, n_a = Wy.shape

    # Concatenate a_prev and xt
    concat = np.zeros((n_a + n_x, m))
    concat[:n_a, :] = a_prev
    concat[n_a:, :] = xt

    # Compute values for ft, it, cct, c_next, ot, a_next using the formulas
    # given figure (4)
    ft = sigmoid(np.dot(Wf, concat) + bf)
    it = sigmoid(np.dot(Wi, concat) + bi)
    cct = np.tanh(np.dot(Wc, concat) + bc)
    c_next = ft * c_prev + it * cct
    ot = sigmoid(np.dot(Wo, concat) + bo)
    a_next = ot * np.tanh(c_next)

    yt_pred = softmax(np.dot(Wy, a_next) + by)

    # store values needed for backward propagation in cache
    cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)
    return a_next, c_next, yt_pred, cache
示例#8
0
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    Implement a single forward step of the LSTM-cell as described in Figure (4)

    Arguments:
    xt -- your input data at timestep "t", numpy array of shape (n_x, m).
    a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m)
    c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m)
    parameters -- python dictionary containing:
      Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x)
      bf -- Bias of the forget gate, numpy array of shape (n_a, 1)
      Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x)
      bi -- Bias of the update gate, numpy array of shape (n_a, 1)
      Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x)
      bc --  Bias of the first "tanh", numpy array of shape (n_a, 1)
      Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x)
      bo --  Bias of the output gate, numpy array of shape (n_a, 1)
      Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a)
      by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1)

    Returns:
    a_next -- next hidden state, of shape (n_a, m)
    c_next -- next memory state, of shape (n_a, m)
    yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m)
    cache -- tuple of values needed for the backward pass,
             contains (a_next, c_next, a_prev, c_prev, xt, parameters)

    Note: ft/it/ot stand for the forget/update/output gates, cct stands for the
          candidate value (c tilde), c stands for the memory value
    """

    # Retrieve parameters from "parameters"
    Wf = parameters["Wf"]
    bf = parameters["bf"]
    Wi = parameters["Wi"]
    bi = parameters["bi"]
    Wc = parameters["Wc"]
    bc = parameters["bc"]
    Wo = parameters["Wo"]
    bo = parameters["bo"]
    Wy = parameters["Wy"]
    by = parameters["by"]

    # Retrieve dimensions from shapes of xt and Wy
    n_x, m = xt.shape
    n_y, n_a = Wy.shape

    # Concatenate a_prev and xt
    concat = np.zeros((n_a + n_x, m))
    concat[: n_a, :] = a_prev
    concat[n_a :, :] = xt

    # Compute values for ft, it, cct, c_next, ot, a_next using the formulas
    # given figure (4)
    ft = sigmoid(np.dot(Wf, concat) + bf)
    it = sigmoid(np.dot(Wi, concat) + bi)
    cct = np.tanh(np.dot(Wc, concat) + bc)
    c_next = ft * c_prev + it * cct
    ot = sigmoid(np.dot(Wo, concat) + bo)
    a_next = ot * np.tanh(c_next)

    yt_pred = softmax(np.dot(Wy, a_next) + by)

    # store values needed for backward propagation in cache
    cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)
    return a_next, c_next, yt_pred, cache