Пример #1
0
def execute_fixed_length_lstm(xs_np, h0_np, c0_np, w0_np, w_np, b_np, num_layers=1, dropout=0.0, bidirectional=False, training=True):
    # Inputs are numpy arrays
    num_directions = 2 if bidirectional else 1
    seq_len = xs_np.shape[0]
    batch_size = xs_np.shape[1]
    hidden_size = h0_np.shape[3]

    xs = nn.Variable.from_numpy_array(xs_np)
    h0 = nn.Variable.from_numpy_array(h0_np)
    c0 = nn.Variable.from_numpy_array(c0_np)
    w0 = nn.Variable.from_numpy_array(w0_np)
    w = None
    b = None
    with_bias = False
    if num_layers > 1:
        w = nn.Variable.from_numpy_array(w_np)
    if type(b_np) is np.ndarray:
        b = nn.Variable.from_numpy_array(b_np)
        with_bias = True

    ys, hn, cn = _create_fixed_length_lstm(
        xs, h0, c0, w0, w, b, num_layers, num_directions, with_bias)  # returns Variables
    dummy = F.sink(ys, hn, cn)
    dummy.forward()

    # returns numpy arrays
    ys = F.reshape(ys, (seq_len, batch_size, num_directions * hidden_size))
    ys.forward()
    return ys.d, hn.d, cn.d
Пример #2
0
def get_lstm_grad(xs_np, h0_np, c0_np, w0_np, w_np, b_np, dy, dh, dc, num_layers=1, dropout=0.0, bidirectional=False, training=True, **kw):
    num_directions = 2 if bidirectional else 1
    seq_len = xs_np.shape[0]
    batch_size = xs_np.shape[1]
    hidden_size = h0_np.shape[3]

    xs = nn.Variable.from_numpy_array(xs_np, need_grad=True)
    h0 = nn.Variable.from_numpy_array(h0_np, need_grad=True)
    c0 = nn.Variable.from_numpy_array(c0_np, need_grad=True)
    w0 = nn.Variable.from_numpy_array(w0_np, need_grad=True)
    w = None
    b = None
    with_bias = False
    if num_layers > 1:
        w = nn.Variable.from_numpy_array(w_np, need_grad=True)
    if type(b_np) == np.ndarray:
        b = nn.Variable.from_numpy_array(b_np, need_grad=True)
        with_bias = True
    xs.grad.zero()
    h0.grad.zero()
    c0.grad.zero()
    w0.grad.zero()
    if num_layers > 1:
        w.grad.zero()
    if with_bias:
        b.grad.zero()

    ys, hn, cn = _create_fixed_length_lstm(
        xs, h0, c0, w0, w, b, num_layers, num_directions, with_bias)  # returns Variables

    dummy = F.sink(ys, hn, cn, one_input_grad=False)
    dummy.forward()
    ys.g = np.reshape(dy, ys.shape)
    hn.g = dh
    cn.g = dc
    dummy.backward()

    if num_layers > 1 and with_bias:
        return np.concatenate((xs.g.flat, h0.g.flat, c0.g.flat, w0.g.flat, w.g.flat, b.g.flat))
    elif num_layers > 1 and not with_bias:
        return np.concatenate((xs.g.flat, h0.g.flat, c0.g.flat, w0.g.flat, w.g.flat))
    elif num_layers == 1 and with_bias:
        return np.concatenate((xs.g.flat, h0.g.flat, c0.g.flat, w0.g.flat, b.g.flat))
    else:
        return np.concatenate((xs.g.flat, h0.g.flat, c0.g.flat, w0.g.flat))
Пример #3
0
def lstm_backward(inputs,
                  num_layers=1,
                  dropout=None,
                  bidirectional=False,
                  training=True):
    """
    Args:
      inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function.
      kwargs (dict of arguments): Dictionary of the corresponding function arguments.

    Return:
      list of Variable: Return the gradients wrt inputs of the corresponding function.
    """
    if dropout != 0.0:
        raise ValueError("Dropout must be 0.0")

    dys = inputs[0]
    dhn = inputs[1]
    dcn = inputs[2]
    xs0 = inputs[3]
    h0 = inputs[4]
    c0 = inputs[5]
    w0 = inputs[6]

    if num_layers == 1:
        w = None
        b = inputs[7] if len(inputs) == 8 else None
    else:
        w = inputs[7]
        b = inputs[8] if len(inputs) == 9 else None
    num_directions = 2 if bidirectional else 1
    with_bias = True if b else False

    ys, hn, cn = _create_fixed_length_lstm(xs0, h0, c0, w0, w, b, num_layers,
                                           num_directions, with_bias)
    outputs = [ys, hn, cn]
    grad_outputs = [dys, dhn, dcn]
    if w and b:
        inputs = [xs0, h0, c0, w0, w, b]
        dxs0, dh0, dc0, dw0, dw, db = nn.grad(outputs,
                                              inputs,
                                              grad_outputs=grad_outputs)
        return dxs0, dh0, dc0, dw0, dw, db
    if w and not b:
        inputs = [xs0, h0, c0, w0, w]
        dxs0, dh0, dc0, dw0, dw = nn.grad(outputs,
                                          inputs,
                                          grad_outputs=grad_outputs)
        return dxs0, dh0, dc0, dw0, dw
    if not w and b:
        inputs = [xs0, h0, c0, w0, b]
        dxs0, dh0, dc0, dw0, db = nn.grad(outputs,
                                          inputs,
                                          grad_outputs=grad_outputs)
        return dxs0, dh0, dc0, dw0, db
    if not w and not b:
        inputs = [xs0, h0, c0, w0]
        dxs0, dh0, dc0, dw0 = nn.grad(outputs,
                                      inputs,
                                      grad_outputs=grad_outputs)
        return dxs0, dh0, dc0, dw0