Python LSTM примеры использования

Язык программирования: Python

Пространство имен/Пакет: neon.layers.recurrent

Класс/Тип: LSTM

Примеров на hotexamples.com: 8

Python LSTM - 8 примеров найдено. Это лучшие примеры Python кода для neon.layers.recurrent.LSTM, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LSTM(4)

fprop(4)

allocate(3)

configure(3)

prev_layer(3)

set_deltas(3)

allocate_deltas(2)

bprop(2)

Пример #1

Показать файл

Файл: test_lstm.py Проект: rupertsmall/neon

def gradient_calc(seq_len, input_size, hidden_size, batch_size,
                  epsilon=None, rand_scale=None, inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon lstm instance
    lstm = LSTM(hidden_size, Gaussian(), Tanh(), Logistic())
    inpa = lstm.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    out_bl = lstm.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = lstm.bprop(lstm.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_lstm(lstm)
        out_pos = lstm.fprop(lstm.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_lstm(lstm)
        out_neg = lstm.fprop(lstm.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale*out_pos)
        loss_neg = np.sum(rand_scale*out_neg)
        # compute the gradient estimate
        grad = 0.5*(loss_pos-loss_neg)/epsilon

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del lstm
    return (grads_est, deltas_neon)

Пример #2

Показать файл

def gradient_calc(seq_len,
                  input_size,
                  hidden_size,
                  batch_size,
                  epsilon=None,
                  rand_scale=None,
                  inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon lstm instance
    lstm = LSTM(hidden_size, Gaussian(), Tanh(), Logistic())
    inpa = lstm.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    out_bl = lstm.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = lstm.bprop(lstm.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_lstm(lstm)
        out_pos = lstm.fprop(lstm.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_lstm(lstm)
        out_neg = lstm.fprop(lstm.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale * out_pos)
        loss_neg = np.sum(rand_scale * out_neg)
        # compute the gradient estimate
        grad = 0.5 * (loss_pos - loss_neg) / epsilon

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del lstm
    return (grads_est, deltas_neon)

Пример #3

Показать файл

Файл: test_bilstm.py Проект: StevenLOL/neon

def test_biLSTM_fprop_rnn(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(),
                    activation=Tanh(), init=init_glorot, reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = LSTM(hidden_size, gate_activation=Logistic(),
               activation=Tanh(), init=init_glorot, reset_cells=True)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0
    rnn.W_input[:] = bilstm.W_input_f
    rnn.W_recur[:] = bilstm.W_recur_f
    rnn.b[:] = bilstm.b_f
    rnn.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)
    inp_rnn = rnn.be.array(lr)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()
    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get()
    out_rnn = rnn.fprop(inp_rnn).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)
    out_rnn_s = get_steps(out_rnn, out_shape)

    # asserts for fprop
    for x_rnn, x_f, x_b, y_f, y_b in zip(out_rnn_s, out_lr_f_s, out_lr_b_s,
                                         reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_rnn, x_f, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_rnn, y_b, rtol=0.0, atol=1.0e-5)

Пример #4

Показать файл

def test_biLSTM_fprop_rnn(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size,
                    gate_activation=Logistic(),
                    activation=Tanh(),
                    init=init_glorot,
                    reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = LSTM(hidden_size,
               gate_activation=Logistic(),
               activation=Tanh(),
               init=init_glorot,
               reset_cells=True)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0
    rnn.W_input[:] = bilstm.W_input_f
    rnn.W_recur[:] = bilstm.W_recur_f
    rnn.b[:] = bilstm.b_f
    rnn.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)
    inp_rnn = rnn.be.array(lr)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()
    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get()
    out_rnn = rnn.fprop(inp_rnn).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)
    out_rnn_s = get_steps(out_rnn, out_shape)

    # asserts for fprop
    for x_rnn, x_f, x_b, y_f, y_b in zip(out_rnn_s, out_lr_f_s, out_lr_b_s,
                                         reversed(out_rl_f_s),
                                         reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_rnn, x_f, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_rnn, y_b, rtol=0.0, atol=1.0e-5)

Пример #5

Показать файл

Файл: test_beamsearch.py Проект: StevenLOL/neon

def test_beamsearch(backend_default):
    """
    Simlulated beam search on a minibatch of 2, for 4 time steps. The
    LSTM states are real but the "softmax outputs" z are hardcoded and
    not taken from  the network.
    There are 6 tokens the network outputs, and they have probabilities
    like exp(1), exp(5), exp(7)

    The test asserts that the score_lists assigned by _beamsearch_step(z_list)
    are equal to the probabilities computed manually adding probabilities
    to z_list.
    """
    be = backend_default

    batch_size = 2
    be.bsz = batch_size
    time_steps = 4
    nout = 6
    num_beams = 3

    # create unused layers
    activation = Tanh()
    gate_activation = Logistic()
    init_ary = np.eye(nout)
    init = Array(init_ary)
    encoder = LSTM(nout, init,
                   activation=activation, gate_activation=gate_activation,
                   name="Enc")
    decoder = LSTM(nout, init,
                   activation=activation, gate_activation=gate_activation,
                   name="Dec")

    class DummyFProp():
        """
        Constructs an artificial beam search example with known correct outputs.
        This is called inside a nested loop over steps, num_life. In the first
        time step there is one life beam, after that, 3 life beams per step.
        There are 4 time steps total. Each beamsearch_step builds one list over
        num_life beams.

        At t=0, the winners for ex0 are 1, 4, 5 (indexed by their position) and
        winners for ex1 are 2,4,5. From there we continue the beam for ex0:
            12, 13, 14              6+2=8 6+3=9  6+2=8
            40, 43, 45  with scores 5+4=9 5+3=8  5+7=12 three new winners 45, 52, 55
            50, 52, 55              5+4=9 5+6=11 5+5=10

        for ex2
            1 4 5  with scores   5 4 7
        we get the three winners 1, 4, 5 and continue (just taking the
        3 in order, no sorting)
            10 12 13 14 (not unique!)  5+2=7  5+2=7  5+3=8
            41 42 43       with scores 4+6=10 4+5=9  4+7=11 winners  43 51 52
            51 52 53                   7+4=11 7+6=13 7+3=10 scores   11 11 13
        continue from the three winners 43 51 52
            431 433 434             11+10=21 11+3=14 11+9=20
            511 512 513 with scores 11+6=17  11+5=16 11+7=18  winners 431 434 520
            520 521 522             13+8=21  13+4=17 13+6=19  scores   21  20  21
        continue from three winners 431 511 513 (going along beams, the matches
        in a beam)
            4310 4312 4313 4314             21+2=23  21+2=23 21+3=24 21+10=31 (not unique!)
            4341 4342 4343      with scores 20+10=30 20+5=25 20+7=27        winners 4314 4341 5204
            5200 5202 5204                  21+8=29  21+6=27 21+10=31       scores    31   30   31
        overall winners are 4314 4341 5204

        """
        def __init__(self):
            self.i = -1
            # t=0
            #                                 X        x  x  <-- winners: 1, 4, 5  (for example 0)
            z = be.array(np.exp(np.array([[1, 6, 2, 1, 5, 5],
                                          [1, 5, 2, 2, 4, 7]]))).T

            # t=1
            #                                     x  x  x  <-- give we picked 4: new winners 2,3,4
            z1 = be.array(np.exp(np.array([[1, 1, 2, 3, 2, 1],
                                           [2, 1, 2, 3, 2, 1]]))).T
            #                               x        x     x  <-- give we picked 5:
            #                                                     new winners 0,3,[5]
            #                                                     score 12
            z2 = be.array(np.exp(np.array([[4, 1, 2, 3, 1, 7],
                                           [2, 6, 5, 7, 2, 4]]))).T
            #                               x     X        X  <-- give we picked 1:
            #                                                     new winners 0,[2],[5]
            #                                                     scores 12, 11
            z3 = be.array(np.exp(np.array([[4, 1, 6, 3, 1, 5],
                                           [1, 4, 6, 3, 2, 1]]))).T

            # t=2
            # example 0: given constructed (1, 5), score 11: 3, 4; scores 21, 20
            z4 = be.array(np.exp(np.array([[1, 1, 2, 10, 9, 1],
                                           [2, 10, 2, 3, 9, 1]]))).T
            # example 0: given constructed (5, 5), score 12: none selected from this beam
            z5 = be.array(np.exp(np.array([[4, 1, 2, 3, 1, 7],
                                           [2, 6, 5, 7, 2, 4]]))).T
            # example 0: given constructed (1, 2), score 12: 1; score 20
            z6 = be.array(np.exp(np.array([[4, 8, 6, 3, 1, 5],
                                           [8, 4, 6, 3, 1, 1]]))).T

            # t=3
            # example 0: given constructed (1, 5, 4), score 20: 1, score 30
            z7 = be.array(np.exp(np.array([[1, 10, 2, 1, 1, 1],
                                           [2, 1, 2, 3, 10, 1]]))).T
            # example 0: given constructed (1, 2, 1), score 20: 5, score 30
            z8 = be.array(np.exp(np.array([[4, 1, 2, 3, 1, 10],
                                           [2, 10, 5, 7, 2, 4]]))).T
            # example 0: given constructed (1, 5, 3), score 21: 4, score 31
            z9 = be.array(np.exp(np.array([[4, 8, 6, 3, 10, 5],
                                           [8, 4, 6, 3, 10, 1]]))).T

            self.z_list = [z, z1, z2, z3, z4, z5, z6, z7, z8, z9]

        def fprop(self, z, inference=True, init_state=None):
            self.i += 1
            return self.z_list[self.i]

    def final_state():
        return be.zeros_like(decoder.h[-1])

    class InObj(NervanaObject):
        def __init__(self):
            self.shape = (nout, time_steps)
            self.decoder_shape = (nout, time_steps)

    decoder.fprop = DummyFProp().fprop
    layers = Seq2Seq([encoder, decoder], decoder_connections=[0])
    layers.decoder._recurrent[0].final_state = final_state

    in_obj = InObj()
    layers.configure(in_obj)  # made zeros because zeros have shape
    layers.allocate()
    layers.allocate_deltas(None)
    beamsearch = BeamSearch(layers)
    inputs = be.iobuf(in_obj.shape)
    beamsearch.beamsearch(inputs, num_beams=num_beams)

    ex0 = np.array([[1, 5, 4, 1],
                    [1, 2, 1, 5],
                    [1, 5, 3, 4]])
    ex1 = np.array([[5, 1, 4, 4],
                    [5, 1, 1, 1],
                    [5, 2, 0, 4]])

    # extract all candidates
    examples = reformat_samples(beamsearch, num_beams, batch_size)
    assert allclose_with_out(examples[0], ex0)
    assert allclose_with_out(examples[1], ex1)

Пример #6

Показать файл

def check_lstm(seq_len,
               input_size,
               hidden_size,
               batch_size,
               init_func,
               inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    hidden_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # neon LSTM
    lstm = LSTM(hidden_size,
                init_func,
                activation=Tanh(),
                gate_activation=Logistic())

    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = lstm.be.array(inp)
    # import pdb; pdb.set_trace()
    # run neon fprop
    lstm.fprop(inpa)

    # reference numpy LSTM
    lstm_ref = RefLSTM()
    WLSTM = lstm_ref.init(input_size, hidden_size)

    # make ref weights and biases with neon model
    WLSTM[0, :] = lstm.b.get().T
    WLSTM[1:input_size + 1, :] = lstm.W_input.get().T
    WLSTM[input_size + 1:] = lstm.W_recur.get().T

    # transpose input X and do fprop
    inp_ref = inp.copy().T.reshape(seq_len, batch_size, input_size)
    (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref, WLSTM)

    # the output needs transpose as well
    Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
    IFOGf_ref = batch_cache['IFOGf'].reshape(seq_len * batch_size,
                                             hidden_size * 4).T
    Ct_ref = batch_cache['Ct'].reshape(seq_len * batch_size, hidden_size).T

    # compare results
    print '====Verifying IFOG===='
    allclose_with_out(lstm.ifog_buffer.get(), IFOGf_ref, rtol=0.0, atol=1.0e-5)

    print '====Verifying cell states===='
    allclose_with_out(lstm.c_act_buffer.get(), Ct_ref, rtol=0.0, atol=1.0e-5)

    print '====Verifying hidden states===='
    allclose_with_out(lstm.h_buffer.get(), Hout_ref, rtol=0.0, atol=1.0e-5)

    print 'fprop is verified'

    # now test the bprop
    # generate random deltas tensor
    deltas = np.random.randn(*hidden_shape)

    lstm.bprop(lstm.be.array(deltas))
    # grab the delta W from gradient buffer
    dWinput_neon = lstm.dW_input.get()
    dWrecur_neon = lstm.dW_recur.get()
    db_neon = lstm.db.get()

    # import pdb; pdb.set_trace()
    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size)
    (dX_ref, dWLSTM_ref, dc0_ref,
     dh0_ref) = lstm_ref.backward(deltas_ref, batch_cache)
    dWrecur_ref = dWLSTM_ref[-hidden_size:, :]
    dWinput_ref = dWLSTM_ref[1:input_size + 1, :]
    db_ref = dWLSTM_ref[0, :]
    dX_ref = dX_ref.reshape(seq_len * batch_size, input_size).T

    # compare results
    print 'Making sure neon LSTM match numpy LSTM in bprop'
    print '====Verifying update on W_recur===='

    assert allclose_with_out(dWrecur_neon,
                             dWrecur_ref.T,
                             rtol=0.0,
                             atol=1.0e-5)

    print '====Verifying update on W_input===='
    assert allclose_with_out(dWinput_neon,
                             dWinput_ref.T,
                             rtol=0.0,
                             atol=1.0e-5)

    print '====Verifying update on bias===='
    assert allclose_with_out(db_neon.flatten(), db_ref, rtol=0.0, atol=1.0e-5)

    print '====Verifying output delta===='
    assert allclose_with_out(lstm.out_deltas_buffer.get(),
                             dX_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    print 'bprop is verified'

    return

Пример #7

Показать файл

Файл: test_lstm.py Проект: hunterlang/neon

def check_lstm(seq_len, input_size, hidden_size,
               batch_size, init_func, inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    hidden_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # neon LSTM
    lstm = LSTM(hidden_size,
                init_func,
                activation=Tanh(),
                gate_activation=Logistic())

    inp = np.random.rand(*input_shape)*inp_moms[1] + inp_moms[0]
    inpa = lstm.be.array(inp)
    # run neon fprop
    lstm.configure((input_size, seq_len))
    lstm.prev_layer = True  # Hack to force allocating a delta buffer
    lstm.allocate()
    lstm.set_deltas([lstm.be.iobuf(lstm.in_shape)])
    lstm.fprop(inpa)

    # reference numpy LSTM
    lstm_ref = RefLSTM()
    WLSTM = lstm_ref.init(input_size, hidden_size)

    # make ref weights and biases with neon model
    WLSTM[0, :] = lstm.b.get().T
    WLSTM[1:input_size+1, :] = lstm.W_input.get().T
    WLSTM[input_size+1:] = lstm.W_recur.get().T

    # transpose input X and do fprop
    inp_ref = inp.copy().T.reshape(seq_len, batch_size, input_size)
    (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref,
                                                             WLSTM)

    # the output needs transpose as well
    Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
    IFOGf_ref = batch_cache['IFOGf'].reshape(seq_len * batch_size, hidden_size * 4).T
    Ct_ref = batch_cache['Ct'].reshape(seq_len * batch_size, hidden_size).T

    # compare results
    print '====Verifying IFOG===='
    allclose_with_out(lstm.ifog_buffer.get(),
                      IFOGf_ref,
                      rtol=0.0,
                      atol=1.0e-5)

    print '====Verifying cell states===='
    allclose_with_out(lstm.c_act_buffer.get(),
                      Ct_ref,
                      rtol=0.0,
                      atol=1.0e-5)

    print '====Verifying hidden states===='
    allclose_with_out(lstm.outputs.get(),
                      Hout_ref,
                      rtol=0.0,
                      atol=1.0e-5)

    print 'fprop is verified'

    # now test the bprop
    # generate random deltas tensor
    deltas = np.random.randn(*hidden_shape)

    lstm.bprop(lstm.be.array(deltas))
    # grab the delta W from gradient buffer
    dWinput_neon = lstm.dW_input.get()
    dWrecur_neon = lstm.dW_recur.get()
    db_neon = lstm.db.get()

    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size)
    (dX_ref, dWLSTM_ref, dc0_ref, dh0_ref) = lstm_ref.backward(deltas_ref,
                                                               batch_cache)
    dWrecur_ref = dWLSTM_ref[-hidden_size:, :]
    dWinput_ref = dWLSTM_ref[1:input_size+1, :]
    db_ref = dWLSTM_ref[0, :]
    dX_ref = dX_ref.reshape(seq_len * batch_size, input_size).T

    # compare results
    print 'Making sure neon LSTM match numpy LSTM in bprop'
    print '====Verifying update on W_recur===='

    assert allclose_with_out(dWrecur_neon,
                             dWrecur_ref.T,
                             rtol=0.0,
                             atol=1.0e-5)

    print '====Verifying update on W_input===='
    assert allclose_with_out(dWinput_neon,
                             dWinput_ref.T,
                             rtol=0.0,
                             atol=1.0e-5)

    print '====Verifying update on bias===='
    assert allclose_with_out(db_neon.flatten(),
                             db_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    print '====Verifying output delta===='
    assert allclose_with_out(lstm.out_deltas_buffer.get(),
                             dX_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    print 'bprop is verified'

    return

Пример #8

Показать файл

Файл: test_beamsearch.py Проект: xingjinglu/PerfAILibs

def test_beamsearch(backend_default):
    """
    Simlulated beam search on a minibatch of 2, for 4 time steps. The
    LSTM states are real but the "softmax outputs" z are hardcoded and
    not taken from  the network.
    There are 6 tokens the network outputs, and they have probabilities
    like exp(1), exp(5), exp(7)

    The test asserts that the score_lists assigned by _beamsearch_step(z_list)
    are equal to the probabilities computed manually adding probabilities
    to z_list.
    """
    be = backend_default

    batch_size = 2
    be.bsz = batch_size
    time_steps = 4
    nout = 6
    num_beams = 3

    # create unused layers
    activation = Tanh()
    gate_activation = Logistic()
    init_ary = np.eye(nout)
    init = Array(init_ary)
    encoder = LSTM(nout,
                   init,
                   activation=activation,
                   gate_activation=gate_activation,
                   name="Enc")
    decoder = LSTM(nout,
                   init,
                   activation=activation,
                   gate_activation=gate_activation,
                   name="Dec")

    class DummyFProp():
        """
        Constructs an artificial beam search example with known correct outputs.
        This is called inside a nested loop over steps, num_life. In the first
        time step there is one life beam, after that, 3 life beams per step.
        There are 4 time steps total. Each beamsearch_step builds one list over
        num_life beams.

        At t=0, the winners for ex0 are 1, 4, 5 (indexed by their position) and
        winners for ex1 are 2,4,5. From there we continue the beam for ex0:
            12, 13, 14              6+2=8 6+3=9  6+2=8
            40, 43, 45  with scores 5+4=9 5+3=8  5+7=12 three new winners 45, 52, 55
            50, 52, 55              5+4=9 5+6=11 5+5=10

        for ex2
            1 4 5  with scores   5 4 7
        we get the three winners 1, 4, 5 and continue (just taking the
        3 in order, no sorting)
            10 12 13 14 (not unique!)  5+2=7  5+2=7  5+3=8
            41 42 43       with scores 4+6=10 4+5=9  4+7=11 winners  43 51 52
            51 52 53                   7+4=11 7+6=13 7+3=10 scores   11 11 13
        continue from the three winners 43 51 52
            431 433 434             11+10=21 11+3=14 11+9=20
            511 512 513 with scores 11+6=17  11+5=16 11+7=18  winners 431 434 520
            520 521 522             13+8=21  13+4=17 13+6=19  scores   21  20  21
        continue from three winners 431 511 513 (going along beams, the matches
        in a beam)
            4310 4312 4313 4314             21+2=23  21+2=23 21+3=24 21+10=31 (not unique!)
            4341 4342 4343      with scores 20+10=30 20+5=25 20+7=27        winners 4314 4341 5204
            5200 5202 5204                  21+8=29  21+6=27 21+10=31       scores    31   30   31
        overall winners are 4314 4341 5204

        """
        def __init__(self):
            self.i = -1
            # t=0
            #                                 X        x  x  <-- winners: 1, 4, 5  (for example 0)
            z = be.array(
                np.exp(np.array([[1, 6, 2, 1, 5, 5], [1, 5, 2, 2, 4, 7]]))).T

            # t=1
            #                                     x  x  x  <-- give we picked 4: new winners 2,3,4
            z1 = be.array(
                np.exp(np.array([[1, 1, 2, 3, 2, 1], [2, 1, 2, 3, 2, 1]]))).T
            #                               x        x     x  <-- give we picked 5:
            #                                                     new winners 0,3,[5]
            #                                                     score 12
            z2 = be.array(
                np.exp(np.array([[4, 1, 2, 3, 1, 7], [2, 6, 5, 7, 2, 4]]))).T
            #                               x     X        X  <-- give we picked 1:
            #                                                     new winners 0,[2],[5]
            #                                                     scores 12, 11
            z3 = be.array(
                np.exp(np.array([[4, 1, 6, 3, 1, 5], [1, 4, 6, 3, 2, 1]]))).T

            # t=2
            # example 0: given constructed (1, 5), score 11: 3, 4; scores 21, 20
            z4 = be.array(
                np.exp(np.array([[1, 1, 2, 10, 9, 1], [2, 10, 2, 3, 9, 1]]))).T
            # example 0: given constructed (5, 5), score 12: none selected from this beam
            z5 = be.array(
                np.exp(np.array([[4, 1, 2, 3, 1, 7], [2, 6, 5, 7, 2, 4]]))).T
            # example 0: given constructed (1, 2), score 12: 1; score 20
            z6 = be.array(
                np.exp(np.array([[4, 8, 6, 3, 1, 5], [8, 4, 6, 3, 1, 1]]))).T

            # t=3
            # example 0: given constructed (1, 5, 4), score 20: 1, score 30
            z7 = be.array(
                np.exp(np.array([[1, 10, 2, 1, 1, 1], [2, 1, 2, 3, 10, 1]]))).T
            # example 0: given constructed (1, 2, 1), score 20: 5, score 30
            z8 = be.array(
                np.exp(np.array([[4, 1, 2, 3, 1, 10], [2, 10, 5, 7, 2, 4]]))).T
            # example 0: given constructed (1, 5, 3), score 21: 4, score 31
            z9 = be.array(
                np.exp(np.array([[4, 8, 6, 3, 10, 5], [8, 4, 6, 3, 10, 1]]))).T

            self.z_list = [z, z1, z2, z3, z4, z5, z6, z7, z8, z9]

        def fprop(self, z, inference=True, init_state=None):
            self.i += 1
            return self.z_list[self.i]

    def final_state():
        return be.zeros_like(decoder.h[-1])

    class InObj(NervanaObject):
        def __init__(self):
            self.shape = (nout, time_steps)
            self.decoder_shape = (nout, time_steps)

    decoder.fprop = DummyFProp().fprop
    layers = Seq2Seq([encoder, decoder], decoder_connections=[0])
    layers.decoder._recurrent[0].final_state = final_state

    in_obj = InObj()
    layers.configure(in_obj)  # made zeros because zeros have shape
    layers.allocate()
    layers.allocate_deltas(None)
    beamsearch = BeamSearch(layers)
    inputs = be.iobuf(in_obj.shape)
    beamsearch.beamsearch(inputs, num_beams=num_beams)

    ex0 = np.array([[1, 5, 4, 1], [1, 2, 1, 5], [1, 5, 3, 4]])
    ex1 = np.array([[5, 1, 4, 4], [5, 1, 1, 1], [5, 2, 0, 4]])

    # extract all candidates
    examples = reformat_samples(beamsearch, num_beams, batch_size)
    assert allclose_with_out(examples[0], ex0)
    assert allclose_with_out(examples[1], ex1)