Пример #1
0
def lstm_unit(hidden_t_prev, cell_t_prev, gates,
              seq_lengths, timestep, forget_bias=0.0, drop_states=False):
    D = cell_t_prev.shape[2]
    G = gates.shape[2]
    N = gates.shape[1]
    t = (timestep * np.ones(shape=(N, D))).astype(np.int32)
    assert t.shape == (N, D)
    seq_lengths = (np.ones(shape=(N, D)) *
                   seq_lengths.reshape(N, 1)).astype(np.int32)
    assert seq_lengths.shape == (N, D)
    assert G == 4 * D
    # Resize to avoid broadcasting inconsistencies with NumPy
    gates = gates.reshape(N, 4, D)
    cell_t_prev = cell_t_prev.reshape(N, D)
    i_t = gates[:, 0, :].reshape(N, D)
    f_t = gates[:, 1, :].reshape(N, D)
    o_t = gates[:, 2, :].reshape(N, D)
    g_t = gates[:, 3, :].reshape(N, D)
    i_t = sigmoid(i_t)
    f_t = sigmoid(f_t + forget_bias)
    o_t = sigmoid(o_t)
    g_t = tanh(g_t)
    valid = (t < seq_lengths).astype(np.int32)
    assert valid.shape == (N, D)
    cell_t = ((f_t * cell_t_prev) + (i_t * g_t)) * (valid) + \
        (1 - valid) * cell_t_prev * (1 - drop_states)
    assert cell_t.shape == (N, D)
    hidden_t = (o_t * tanh(cell_t)) * valid + hidden_t_prev * (
        1 - valid) * (1 - drop_states)
    hidden_t = hidden_t.reshape(1, N, D)
    cell_t = cell_t.reshape(1, N, D)
    return hidden_t, cell_t
Пример #2
0
def gru_reference(input, hidden_input,
                  reset_gate_w, reset_gate_b,
                  update_gate_w, update_gate_b,
                  output_gate_w, output_gate_b,
                  seq_lengths, drop_states=False,
                  linear_before_reset=False):
    D = hidden_input.shape[hidden_input.ndim - 1]
    T = input.shape[0]
    N = input.shape[1]
    G = input.shape[2]
    print("Dimensions: T= ", T, " N= ", N, " G= ", G, " D= ", D)
    hidden = np.zeros(shape=(T + 1, N, D))
    hidden[0, :, :] = hidden_input

    for t in range(T):
        input_t = input[t].reshape(1, N, G)
        hidden_t_prev = hidden[t].reshape(1, N, D)

        # Split input contributions for three gates.
        input_t = input_t.reshape(N, 3, D)
        input_reset = input_t[:, 0, :].reshape(N, D)
        input_update = input_t[:, 1, :].reshape(N, D)
        input_output = input_t[:, 2, :].reshape(N, D)

        reset_gate = np.dot(hidden_t_prev, reset_gate_w.T) + reset_gate_b
        reset_gate = reset_gate + input_reset

        update_gate = np.dot(hidden_t_prev, update_gate_w.T) + update_gate_b
        update_gate = update_gate + input_update

        if linear_before_reset:
            with_linear = np.dot(
                hidden_t_prev, output_gate_w.T) + output_gate_b
            output_gate = sigmoid(reset_gate) * with_linear
        else:
            with_reset = hidden_t_prev * sigmoid(reset_gate)
            output_gate = np.dot(with_reset, output_gate_w.T) + output_gate_b
        output_gate = output_gate + input_output

        gates_out_t = np.concatenate(
            (reset_gate, update_gate, output_gate),
            axis=2,
        )
        print(reset_gate, update_gate, output_gate, gates_out_t, sep="\n")

        (hidden_t, ) = gru_unit(
            hidden_t_prev,
            gates_out_t,
            seq_lengths,
            t,
            drop_states=drop_states
        )
        hidden[t + 1] = hidden_t

    return (
        hidden[1:],
        hidden[-1].reshape(1, N, D),
    )
Пример #3
0
def gru_reference(input, hidden_input,
                   reset_gate_w, reset_gate_b,
                   update_gate_w, update_gate_b,
                   output_gate_w, output_gate_b,
                   seq_lengths, drop_states=False,
                   linear_before_reset=False):
    D = hidden_input.shape[hidden_input.ndim - 1]
    T = input.shape[0]
    N = input.shape[1]
    G = input.shape[2]
    print("Dimensions: T= ", T, " N= ", N, " G= ", G, " D= ", D)
    hidden = np.zeros(shape=(T + 1, N, D))
    hidden[0, :, :] = hidden_input

    for t in range(T):
        input_t = input[t].reshape(1, N, G)
        hidden_t_prev = hidden[t].reshape(1, N, D)

        # Split input contributions for three gates.
        input_t = input_t.reshape(N, 3, D)
        input_reset = input_t[:, 0, :].reshape(N, D)
        input_update = input_t[:, 1, :].reshape(N, D)
        input_output = input_t[:, 2, :].reshape(N, D)

        reset_gate = np.dot(hidden_t_prev, reset_gate_w.T) + reset_gate_b
        reset_gate = reset_gate + input_reset

        update_gate = np.dot(hidden_t_prev, update_gate_w.T) + update_gate_b
        update_gate = update_gate + input_update

        if linear_before_reset:
            with_linear = np.dot(hidden_t_prev, output_gate_w.T) + output_gate_b
            output_gate = sigmoid(reset_gate) * with_linear
        else:
            with_reset = hidden_t_prev * sigmoid(reset_gate)
            output_gate = np.dot(with_reset, output_gate_w.T) + output_gate_b
        output_gate = output_gate + input_output

        gates_out_t = np.concatenate(
            (reset_gate, update_gate, output_gate),
            axis=2,
        )
        print(reset_gate, update_gate, output_gate, gates_out_t, sep="\n")

        (hidden_t, ) = gru_unit(
            hidden_t_prev,
            gates_out_t,
            seq_lengths,
            t,
            drop_states=drop_states
        )
        hidden[t + 1] = hidden_t

    return (
        hidden[1:],
        hidden[-1].reshape(1, N, D),
    )
Пример #4
0
def gru_unit(*args, **kwargs):
    '''
    Implements one GRU unit, for one time step

    Shapes:
    hidden_t_prev.shape     = (1, N, D)
    gates_out_t.shape       = (1, N, G)
    seq_lenths.shape        = (N,)
    '''

    drop_states = kwargs.get('drop_states', False)
    sequence_lengths = kwargs.get('sequence_lengths', True)

    if sequence_lengths:
        hidden_t_prev, gates_out_t, seq_lengths, timestep = args
    else:
        hidden_t_prev, gates_out_t, timestep = args

    N = hidden_t_prev.shape[1]
    D = hidden_t_prev.shape[2]
    G = gates_out_t.shape[2]
    t = (timestep * np.ones(shape=(N, D))).astype(np.int32)
    assert t.shape == (N, D)
    assert G == 3 * D
    # Calculate reset, update, and output gates separately
    # because output gate depends on reset gate.
    gates_out_t = gates_out_t.reshape(N, 3, D)
    reset_gate_t = gates_out_t[:, 0, :].reshape(N, D)
    update_gate_t = gates_out_t[:, 1, :].reshape(N, D)
    output_gate_t = gates_out_t[:, 2, :].reshape(N, D)

    # Calculate gate outputs.
    reset_gate_t = sigmoid(reset_gate_t)
    update_gate_t = sigmoid(update_gate_t)
    output_gate_t = tanh(output_gate_t)

    if sequence_lengths:
        seq_lengths = (np.ones(shape=(N, D)) *
                       seq_lengths.reshape(N, 1)).astype(np.int32)
        assert seq_lengths.shape == (N, D)
        valid = (t < seq_lengths).astype(np.int32)
    else:
        valid = np.ones(shape=(N, D))
    assert valid.shape == (N, D)
    hidden_t = update_gate_t * hidden_t_prev + (1 -
                                                update_gate_t) * output_gate_t
    hidden_t = hidden_t * valid + hidden_t_prev * (1 - valid) * (1 -
                                                                 drop_states)
    hidden_t = hidden_t.reshape(1, N, D)

    return (hidden_t, )
Пример #5
0
def gru_unit(*args, **kwargs):
    '''
    Implements one GRU unit, for one time step

    Shapes:
    hidden_t_prev.shape     = (1, N, D)
    gates_out_t.shape       = (1, N, G)
    seq_lenths.shape        = (N,)
    '''

    drop_states = kwargs.get('drop_states', False)
    sequence_lengths = kwargs.get('sequence_lengths', True)

    if sequence_lengths:
        hidden_t_prev, gates_out_t, seq_lengths, timestep = args
    else:
        hidden_t_prev, gates_out_t, timestep = args

    N = hidden_t_prev.shape[1]
    D = hidden_t_prev.shape[2]
    G = gates_out_t.shape[2]
    t = (timestep * np.ones(shape=(N, D))).astype(np.int32)
    assert t.shape == (N, D)
    assert G == 3 * D
    # Calculate reset, update, and output gates separately
    # because output gate depends on reset gate.
    gates_out_t = gates_out_t.reshape(N, 3, D)
    reset_gate_t = gates_out_t[:, 0, :].reshape(N, D)
    update_gate_t = gates_out_t[:, 1, :].reshape(N, D)
    output_gate_t = gates_out_t[:, 2, :].reshape(N, D)

    # Calculate gate outputs.
    reset_gate_t = sigmoid(reset_gate_t)
    update_gate_t = sigmoid(update_gate_t)
    output_gate_t = tanh(output_gate_t)

    if sequence_lengths:
        seq_lengths = (np.ones(shape=(N, D)) *
                       seq_lengths.reshape(N, 1)).astype(np.int32)
        assert seq_lengths.shape == (N, D)
        valid = (t < seq_lengths).astype(np.int32)
    else:
        valid = np.ones(shape=(N, D))
    assert valid.shape == (N, D)
    hidden_t = update_gate_t * hidden_t_prev + (1 - update_gate_t) * output_gate_t
    hidden_t = hidden_t * valid + hidden_t_prev * (1 - valid) * (1 - drop_states)
    hidden_t = hidden_t.reshape(1, N, D)

    return (hidden_t, )