示例#1
0
def test_biRNN_bprop(backend_default, fargs, deltas_buffer):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size,
                  activation=Rectlinclip(slope=0),
                  init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()

    birnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    birnn.set_deltas(deltas_buffer)

    # same weight for bi-rnn backward and rnn weights
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # same weight for bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = Recurrent(hidden_size,
                    activation=Rectlinclip(slope=0),
                    init=init_glorot)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    rnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    rnn.set_deltas(deltas_buffer)

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr_g = birnn.fprop(inp_lr)
    del_lr = birnn.bprop(out_lr_g).get().copy()
    birnn.h_buffer[:] = 0
    out_rl_g = birnn.fprop(inp_rl)
    del_rl = birnn.bprop(out_rl_g).get().copy()

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)
    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
示例#2
0
def test_biRNN_bprop(backend_default, fargs, deltas_buffer):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()

    birnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    birnn.set_deltas(deltas_buffer)

    # same weight for bi-rnn backward and rnn weights
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # same weight for bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = Recurrent(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    rnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    rnn.set_deltas(deltas_buffer)

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr_g = birnn.fprop(inp_lr)
    del_lr = birnn.bprop(out_lr_g).get().copy()
    birnn.h_buffer[:] = 0
    out_rl_g = birnn.fprop(inp_rl)
    del_rl = birnn.bprop(out_rl_g).get().copy()

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)
    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
示例#3
0
def test_biLSTM_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size,
                    gate_activation=Logistic(),
                    init=init_glorot,
                    activation=Tanh(),
                    reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    # same weight
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()

    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s),
                                  reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
示例#4
0
def test_biRNN_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size,
                  activation=Rectlinclip(slope=0),
                  init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()
    birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)])

    # same weight
    nout = hidden_size
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr = birnn.fprop(inp_lr).get().copy()

    birnn.h_buffer[:] = 0
    out_rl = birnn.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s),
                                  reversed(out_rl_b_s)):
        assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)
示例#5
0
def test_biLSTM_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(), init=init_glorot,
                    activation=Tanh(), reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()
    bilstm.set_deltas([bilstm.be.iobuf(bilstm.in_shape)])

    # same weight
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()

    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)
示例#6
0
def test_biRNN_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()

    # same weight
    nout = hidden_size
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr = birnn.fprop(inp_lr).get().copy()

    birnn.h_buffer[:] = 0
    out_rl = birnn.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
示例#7
0
def test_biLSTM_bprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(),
                    activation=Tanh(), init=init_glorot, reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()
    bilstm.set_deltas([bilstm.be.iobuf(bilstm.in_shape)])

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr_g = bilstm.fprop(inp_lr)
    out_lr = out_lr_g.get().copy()
    del_lr = bilstm.bprop(out_lr_g).get().copy()
    bilstm.h_buffer[:] = 0
    out_rl_g = bilstm.fprop(inp_rl)
    out_rl = out_rl_g.get().copy()
    del_rl = bilstm.bprop(out_rl_g).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)

    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
示例#8
0
    def init_buffers(self, inputs):
        """
        Initialize buffers for recurrent internal units and outputs.
        Buffers are initialized as 2D tensors with second dimension being steps * batch_size
        A list of views are created on the buffer for easy manipulation of data
        related to a certain time step

        Arguments:
            inputs (Tensor): input data as 2D tensor. The dimension is
                             (input_size, sequence_length * batch_size)

        """
        if self.x is None or self.x is not inputs:
            self.x = inputs
            self.xs = get_steps(inputs, self.in_shape)
示例#9
0
    def init_buffers(self, inputs):
        """
        Initialize buffers for recurrent internal units and outputs.
        Buffers are initialized as 2D tensors with second dimension being steps * batch_size
        A list of views are created on the buffer for easy manipulation of data
        related to a certain time step

        Arguments:
            inputs (Tensor): input data as 2D tensor. The dimension is
                             (input_size, sequence_length * batch_size)

        """
        if self.x is None or self.x is not inputs:
            self.x = inputs
            self.xs = get_steps(inputs, self.in_shape)
示例#10
0
    def set_deltas(self, delta_buffers):
        """
        Use pre-allocated (by layer containers) list of buffers for backpropagated error.
        Only set deltas for layers that own their own deltas
        Only allocate space if layer owns its own deltas (e.g., bias and activation work in-place,
        so do not own their deltas).

        Arguments:
            delta_buffers (list): list of pre-allocated tensors (provided by layer container)
        """
        super(TimeDistributedRecurrentOutput, self).set_deltas(delta_buffers)
        self.deltas_buffer = self.deltas
        if self.deltas:
            self.deltas = get_steps(self.deltas_buffer, self.in_shape)
        else:
            self.deltas = []  # for simplifying bprop notation
示例#11
0
    def set_deltas(self, delta_buffers):
        """
        Use pre-allocated (by layer containers) list of buffers for backpropagated error.
        Only set deltas for layers that own their own deltas
        Only allocate space if layer owns its own deltas (e.g., bias and activation work in-place,
        so do not own their deltas).

        Arguments:
            delta_buffers (list): list of pre-allocated tensors (provided by layer container)
        """
        super(TimeDistributedRecurrentOutput, self).set_deltas(delta_buffers)
        self.deltas_buffer = self.deltas
        if self.deltas:
            self.deltas = get_steps(self.deltas_buffer, self.in_shape)
        else:
            self.deltas = []  # for simplifying bprop notation
示例#12
0
    def bprop(self, error, alpha=1.0, beta=0.0):
        """
        Backpropagation of errors, output delta for previous layer, and
        calculate the update on model params

        Arguments:
            error (list[Tensor]): error tensors for each time step
                                  of unrolling
            alpha (float, optional): scale to apply to input for activation
                                     gradient bprop.  Defaults to 1.0
            beta (float, optional): scale to apply to output activation
                                    gradient bprop.  Defaults to 0.0

        Returns:
            Tensor: Backpropagated errors for each time step of model unrolling
        """
        self.dW[:] = 0

        if self.in_deltas_f is None:
            self.in_deltas_f = get_steps(error[:self.o_shape[0]], self.o_shape)
            self.prev_in_deltas = self.in_deltas_f[-1:] + self.in_deltas_f[:-1]
            self.ifog_delta_last_steps = self.ifog_delta_buffer[:,
                                                                self.be.bsz:]
            self.h_first_steps = self.h_buffer_f[:, :-self.be.bsz]
            # h_delta[5] * h[4] + h_delta[4] * h[3] + ... + h_delta[1] * h[0]

        if self.in_deltas_b is None:
            self.in_deltas_b = get_steps(error[self.o_shape[0]:], self.o_shape)
            self.next_in_deltas = self.in_deltas_b[1:] + self.in_deltas_b[:1]
            self.ifog_delta_first_steps = self.ifog_delta_buffer[:, :-self.be.
                                                                 bsz]
            self.h_last_steps = self.h_buffer_b[:, self.be.bsz:]
            # h_delta[0] * h[1] + h_delta[1] * h[2] + ... + h_delta[4] * h[5]

        params_f = (self.in_deltas_f, self.prev_in_deltas, self.i_f, self.f_f,
                    self.o_f, self.g_f, self.ifog_delta, self.i_delta,
                    self.f_delta, self.o_delta, self.g_delta, self.c_delta,
                    self.c_delta_prev, self.c_prev_bprop, self.c_act_f)

        params_b = (self.in_deltas_b, self.next_in_deltas, self.i_b, self.f_b,
                    self.o_b, self.g_b, self.ifog_delta, self.i_delta,
                    self.f_delta, self.o_delta, self.g_delta, self.c_delta,
                    self.c_delta_next, self.c_next_bprop, self.c_act_b)

        # bprop for forward direction connections . Error flow from right to left
        self.c_delta_buffer[:] = 0
        self.ifog_delta_buffer[:] = 0
        self.ifog_delta_f = None
        self.ifog_delta_b = None
        for idx, (in_deltas, prev_in_deltas,
                  i, f, o, g,
                  ifog_delta, i_delta, f_delta, o_delta, g_delta,
                  c_delta, c_delta_prev, c_prev, c_act) \
                in enumerate(reversed(list(zip(*params_f)))):

            # current cell delta
            c_delta[:] = c_delta + \
                         self.activation.bprop(c_act) * (o * in_deltas)
            i_delta[:] = self.gate_activation.bprop(i) * c_delta * g
            f_delta[:] = self.gate_activation.bprop(f) * c_delta * c_prev
            o_delta[:] = self.gate_activation.bprop(o) * in_deltas * c_act
            g_delta[:] = self.activation.bprop(g) * c_delta * i

            # bprop the errors to prev_in_delta and c_delta_prev
            self.be.compound_dot(self.W_recur_f.T,
                                 ifog_delta,
                                 prev_in_deltas,
                                 beta=1.0)
            if c_delta_prev is not None:
                c_delta_prev[:] = c_delta * f

        # Weight deltas and accumulate
        self.be.compound_dot(self.ifog_delta_last_steps, self.h_first_steps.T,
                             self.dW_recur_f)
        self.be.compound_dot(self.ifog_delta_buffer, self.x_f.T,
                             self.dW_input_f)
        self.db_f[:] = self.be.sum(self.ifog_delta_buffer, axis=1)
        # out deltas to input units
        if self.out_deltas_buffer:
            self.be.compound_dot(self.W_input_f.T,
                                 self.ifog_delta_buffer,
                                 self.out_deltas_buffer_f_v,
                                 alpha=alpha,
                                 beta=beta)

        # bprop for backward direction connections. Error flow from left to right
        self.c_delta_buffer[:] = 0
        self.ifog_delta_buffer[:] = 0
        for idx, (in_deltas, next_in_deltas,
                  i, f, o, g,
                  ifog_delta, i_delta, f_delta, o_delta, g_delta,
                  c_delta, c_delta_next, c_next, c_act) \
                in enumerate(zip(*params_b)):

            # current cell delta
            c_delta[:] = c_delta[:] + \
                         self.activation.bprop(c_act) * (o * in_deltas)
            i_delta[:] = self.gate_activation.bprop(i) * c_delta * g
            f_delta[:] = self.gate_activation.bprop(f) * c_delta * c_next
            o_delta[:] = self.gate_activation.bprop(o) * in_deltas * c_act
            g_delta[:] = self.activation.bprop(g) * c_delta * i

            # bprop the errors to next_in_delta and c_next_delta
            self.be.compound_dot(self.W_recur_b.T,
                                 ifog_delta,
                                 next_in_deltas,
                                 beta=1.0)
            if c_delta_next is not None:
                c_delta_next[:] = c_delta * f

        # Weight deltas and accumulate
        self.be.compound_dot(self.ifog_delta_first_steps, self.h_last_steps.T,
                             self.dW_recur_b)
        self.be.compound_dot(self.ifog_delta_buffer, self.x_b.T,
                             self.dW_input_b)
        self.db_b[:] = self.be.sum(self.ifog_delta_buffer, axis=1)
        # out deltas to input units. bprop to the same inputs if
        # split_inputs=False
        if self.out_deltas_buffer:
            self.be.compound_dot(self.W_input_b.T,
                                 self.ifog_delta_buffer,
                                 self.out_deltas_buffer_b_v,
                                 alpha=alpha,
                                 beta=beta if self.inputs else 1.0)

        return self.out_deltas_buffer
示例#13
0
def test_biLSTM_bprop(backend_default, fargs, deltas_buffer):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(),
                    activation=Tanh(), init=init_glorot, reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    bilstm.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    bilstm.set_deltas(deltas_buffer)

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr_g = bilstm.fprop(inp_lr)
    out_lr = out_lr_g.get().copy()
    del_lr = bilstm.bprop(out_lr_g).get().copy()
    bilstm.h_buffer[:] = 0
    out_rl_g = bilstm.fprop(inp_rl)
    out_rl = out_rl_g.get().copy()
    del_rl = bilstm.bprop(out_rl_g).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)

    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert allclose_with_out(x, y, rtol=0.0, atol=1.0e-5)
示例#14
0
    def bprop(self, error, alpha=1.0, beta=0.0):
        """
        Backpropagation of errors, output delta for previous layer, and
        calculate the update on model params

        Arguments:
            error (list[Tensor]): error tensors for each time step
                                  of unrolling
            alpha (float, optional): scale to apply to input for activation
                                     gradient bprop.  Defaults to 1.0
            beta (float, optional): scale to apply to output activation
                                    gradient bprop.  Defaults to 0.0

        Returns:
            Tensor: Backpropagated errors for each time step of model unrolling
        """
        self.dW[:] = 0

        if self.in_deltas_f is None:
            self.in_deltas_f = get_steps(error[:self.o_shape[0]], self.o_shape)
            self.prev_in_deltas = self.in_deltas_f[-1:] + self.in_deltas_f[:-1]
            self.ifog_delta_last_steps = self.ifog_delta_buffer[:, self.be.bsz:]
            self.h_first_steps = self.h_buffer_f[:, :-self.be.bsz]
            # h_delta[5] * h[4] + h_delta[4] * h[3] + ... + h_delta[1] * h[0]

        if self.in_deltas_b is None:
            self.in_deltas_b = get_steps(error[self.o_shape[0]:], self.o_shape)
            self.next_in_deltas = self.in_deltas_b[1:] + self.in_deltas_b[:1]
            self.ifog_delta_first_steps = self.ifog_delta_buffer[:, :-self.be.bsz]
            self.h_last_steps = self.h_buffer_b[:, self.be.bsz:]
            # h_delta[0] * h[1] + h_delta[1] * h[2] + ... + h_delta[4] * h[5]

        params_f = (self.in_deltas_f, self.prev_in_deltas,
                    self.i_f, self.f_f, self.o_f, self.g_f,
                    self.ifog_delta, self.i_delta, self.f_delta, self.o_delta, self.g_delta,
                    self.c_delta, self.c_delta_prev, self.c_prev_bprop, self.c_act_f)

        params_b = (self.in_deltas_b, self.next_in_deltas,
                    self.i_b, self.f_b, self.o_b, self.g_b,
                    self.ifog_delta, self.i_delta, self.f_delta, self.o_delta, self.g_delta,
                    self.c_delta, self.c_delta_next, self.c_next_bprop, self.c_act_b)

        # bprop for forward direction connections . Error flow from right to left
        self.c_delta_buffer[:] = 0
        self.ifog_delta_buffer[:] = 0
        self.ifog_delta_f = None
        self.ifog_delta_b = None
        for idx, (in_deltas, prev_in_deltas,
                  i, f, o, g,
                  ifog_delta, i_delta, f_delta, o_delta, g_delta,
                  c_delta, c_delta_prev, c_prev, c_act) \
                in enumerate(reversed(list(zip(*params_f)))):

            # current cell delta
            c_delta[:] = c_delta + \
                         self.activation.bprop(c_act) * (o * in_deltas)
            i_delta[:] = self.gate_activation.bprop(i) * c_delta * g
            f_delta[:] = self.gate_activation.bprop(f) * c_delta * c_prev
            o_delta[:] = self.gate_activation.bprop(o) * in_deltas * c_act
            g_delta[:] = self.activation.bprop(g) * c_delta * i

            # bprop the errors to prev_in_delta and c_delta_prev
            self.be.compound_dot(
                    self.W_recur_f.T, ifog_delta, prev_in_deltas, beta=1.0)
            if c_delta_prev is not None:
                c_delta_prev[:] = c_delta * f

        # Weight deltas and accumulate
        self.be.compound_dot(
                self.ifog_delta_last_steps, self.h_first_steps.T, self.dW_recur_f)
        self.be.compound_dot(
                self.ifog_delta_buffer, self.x_f.T, self.dW_input_f)
        self.db_f[:] = self.be.sum(self.ifog_delta_buffer, axis=1)
        # out deltas to input units
        if self.out_deltas_buffer:
            self.be.compound_dot(
                    self.W_input_f.T, self.ifog_delta_buffer,
                    self.out_deltas_buffer_f_v,
                    alpha=alpha, beta=beta)

        # bprop for backward direction connections. Error flow from left to right
        self.c_delta_buffer[:] = 0
        self.ifog_delta_buffer[:] = 0
        for idx, (in_deltas, next_in_deltas,
                  i, f, o, g,
                  ifog_delta, i_delta, f_delta, o_delta, g_delta,
                  c_delta, c_delta_next, c_next, c_act) \
                in enumerate(zip(*params_b)):

            # current cell delta
            c_delta[:] = c_delta[:] + \
                         self.activation.bprop(c_act) * (o * in_deltas)
            i_delta[:] = self.gate_activation.bprop(i) * c_delta * g
            f_delta[:] = self.gate_activation.bprop(f) * c_delta * c_next
            o_delta[:] = self.gate_activation.bprop(o) * in_deltas * c_act
            g_delta[:] = self.activation.bprop(g) * c_delta * i

            # bprop the errors to next_in_delta and c_next_delta
            self.be.compound_dot(
                    self.W_recur_b.T, ifog_delta, next_in_deltas, beta=1.0)
            if c_delta_next is not None:
                c_delta_next[:] = c_delta * f

        # Weight deltas and accumulate
        self.be.compound_dot(
                self.ifog_delta_first_steps, self.h_last_steps.T, self.dW_recur_b)
        self.be.compound_dot(
                self.ifog_delta_buffer, self.x_b.T, self.dW_input_b)
        self.db_b[:] = self.be.sum(self.ifog_delta_buffer, axis=1)
        # out deltas to input units. bprop to the same inputs if
        # split_inputs=False
        if self.out_deltas_buffer:
            self.be.compound_dot(self.W_input_b.T, self.ifog_delta_buffer,
                                 self.out_deltas_buffer_b_v,
                                 alpha=alpha,
                                 beta=beta if self.inputs else 1.0)

        return self.out_deltas_buffer
示例#15
0
    def allocate_deltas(self, global_deltas=None):
        super(Encoder, self).allocate_deltas(global_deltas=global_deltas)

        self.error_buf = self.be.iobuf(self.out_shape)
        self.error_slices = get_steps(self.error_buf, self.out_shape)