def test_biRNN_bprop(backend_default, fargs, deltas_buffer): # basic sanity check with 0 weights random inputs seq_len, input_size, hidden_size, batch_size = fargs in_shape = (input_size, seq_len) NervanaObject.be.bsz = batch_size # setup the bi-directional rnn init_glorot = GlorotUniform() birnn = BiRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot) birnn.configure(in_shape) birnn.prev_layer = True birnn.allocate() birnn.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() birnn.set_deltas(deltas_buffer) # same weight for bi-rnn backward and rnn weights birnn.W_input_b[:] = birnn.W_input_f birnn.W_recur_b[:] = birnn.W_recur_f birnn.b_b[:] = birnn.b_f birnn.dW[:] = 0 # same weight for bi-directional rnn init_glorot = GlorotUniform() rnn = Recurrent(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot) rnn.configure(in_shape) rnn.prev_layer = True rnn.allocate() rnn.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() rnn.set_deltas(deltas_buffer) # inputs and views lr = np.random.random((input_size, seq_len * batch_size)) lr_rev = list(reversed(get_steps(lr.copy(), in_shape))) rl = con(lr_rev, axis=1) # allocate gpu buffers inp_lr = birnn.be.array(lr) inp_rl = birnn.be.array(rl) # outputs out_lr_g = birnn.fprop(inp_lr) del_lr = birnn.bprop(out_lr_g).get().copy() birnn.h_buffer[:] = 0 out_rl_g = birnn.fprop(inp_rl) del_rl = birnn.bprop(out_rl_g).get().copy() del_lr_s = get_steps(del_lr, in_shape) del_rl_s = get_steps(del_rl, in_shape) for (x, y) in zip(del_lr_s, reversed(del_rl_s)): assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
def test_lookuptable_ones_error(backend_default, basic_linargs): nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.set_deltas([layer.be.iobuf(nin)]) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.ones((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() unqidx, count = np.unique(inp, return_counts=True) dw_exp = np.zeros((1, nout)) for wrd_id, cnt in zip(unqidx, count): dw_exp = err[:, 0] * cnt assert np.all(dw_exp == dw[wrd_id, :]) return
def test_lookuptable_zeros_error(backend_default, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable( vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.set_deltas([layer.be.iobuf(nin)]) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.zeros((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_biLSTM_bprop(backend_default, fargs): # basic sanity check with 0 weights random inputs seq_len, input_size, hidden_size, batch_size = fargs in_shape = (input_size, seq_len) out_shape = (hidden_size, seq_len) NervanaObject.be.bsz = batch_size # setup the bi-directional rnn init_glorot = GlorotUniform() bilstm = BiLSTM(hidden_size, gate_activation=Logistic(), activation=Tanh(), init=init_glorot, reset_cells=True) bilstm.configure(in_shape) bilstm.prev_layer = True bilstm.allocate() bilstm.set_deltas([bilstm.be.iobuf(bilstm.in_shape)]) # same weight for bi-rnn backward and rnn weights nout = hidden_size bilstm.W_input_b[:] = bilstm.W_input_f bilstm.W_recur_b[:] = bilstm.W_recur_f bilstm.b_b[:] = bilstm.b_f bilstm.dW[:] = 0 # inputs and views lr = np.random.random((input_size, seq_len * batch_size)) lr_rev = list(reversed(get_steps(lr.copy(), in_shape))) rl = con(lr_rev, axis=1) # allocate gpu buffers inp_lr = bilstm.be.array(lr) inp_rl = bilstm.be.array(rl) # outputs out_lr_g = bilstm.fprop(inp_lr) out_lr = out_lr_g.get().copy() del_lr = bilstm.bprop(out_lr_g).get().copy() bilstm.h_buffer[:] = 0 out_rl_g = bilstm.fprop(inp_rl) out_rl = out_rl_g.get().copy() del_rl = bilstm.bprop(out_rl_g).get().copy() # views out_lr_f_s = get_steps(out_lr[:nout], out_shape) out_lr_b_s = get_steps(out_lr[nout:], out_shape) out_rl_f_s = get_steps(out_rl[:nout], out_shape) out_rl_b_s = get_steps(out_rl[nout:], out_shape) # asserts for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s), reversed(out_rl_b_s)): assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5) assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5) del_lr_s = get_steps(del_lr, in_shape) del_rl_s = get_steps(del_rl, in_shape) for (x, y) in zip(del_lr_s, reversed(del_rl_s)): assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
def test_biLSTM_fprop(backend_default, fargs): # basic sanity check with 0 weights random inputs seq_len, input_size, hidden_size, batch_size = fargs in_shape = (input_size, seq_len) out_shape = (hidden_size, seq_len) NervanaObject.be.bsz = batch_size # setup the bi-directional rnn init_glorot = GlorotUniform() bilstm = BiLSTM(hidden_size, gate_activation=Logistic(), init=init_glorot, activation=Tanh(), reset_cells=True) bilstm.configure(in_shape) bilstm.prev_layer = True bilstm.allocate() # same weight nout = hidden_size bilstm.W_input_b[:] = bilstm.W_input_f bilstm.W_recur_b[:] = bilstm.W_recur_f bilstm.b_b[:] = bilstm.b_f bilstm.dW[:] = 0 # inputs - random and flipped left-to-right inputs lr = np.random.random((input_size, seq_len * batch_size)) lr_rev = list(reversed(get_steps(lr.copy(), in_shape))) rl = con(lr_rev, axis=1) inp_lr = bilstm.be.array(lr) inp_rl = bilstm.be.array(rl) # outputs out_lr = bilstm.fprop(inp_lr).get().copy() bilstm.h_buffer[:] = 0 out_rl = bilstm.fprop(inp_rl).get().copy() # views out_lr_f_s = get_steps(out_lr[:nout], out_shape) out_lr_b_s = get_steps(out_lr[nout:], out_shape) out_rl_f_s = get_steps(out_rl[:nout], out_shape) out_rl_b_s = get_steps(out_rl[nout:], out_shape) # asserts for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s), reversed(out_rl_b_s)): assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5) assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
def test_biRNN_fprop(backend_default, fargs): # basic sanity check with 0 weights random inputs seq_len, input_size, hidden_size, batch_size = fargs in_shape = (input_size, seq_len) out_shape = (hidden_size, seq_len) NervanaObject.be.bsz = batch_size # setup the bi-directional rnn init_glorot = GlorotUniform() birnn = BiRNN(hidden_size, activation=Rectlinclip(slope=0), init=init_glorot) birnn.configure(in_shape) birnn.prev_layer = True birnn.allocate() birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)]) # same weight nout = hidden_size birnn.W_input_b[:] = birnn.W_input_f birnn.W_recur_b[:] = birnn.W_recur_f birnn.b_b[:] = birnn.b_f birnn.dW[:] = 0 # inputs - random and flipped left-to-right inputs lr = np.random.random((input_size, seq_len * batch_size)) lr_rev = list(reversed(get_steps(lr.copy(), in_shape))) rl = con(lr_rev, axis=1) inp_lr = birnn.be.array(lr) inp_rl = birnn.be.array(rl) # outputs out_lr = birnn.fprop(inp_lr).get().copy() birnn.h_buffer[:] = 0 out_rl = birnn.fprop(inp_rl).get().copy() # views out_lr_f_s = get_steps(out_lr[:nout], out_shape) out_lr_b_s = get_steps(out_lr[nout:], out_shape) out_rl_f_s = get_steps(out_rl[:nout], out_shape) out_rl_b_s = get_steps(out_rl[nout:], out_shape) # asserts for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s), reversed(out_rl_b_s)): assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5) assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)
def test_glorot(backend, args): be = NervanaObject.be shape_1 = (1, 2) shape_2 = (1000, 10000) Wdev_1 = be.empty(shape_1) Wdev_2 = be.empty(shape_2) glorot_init = GlorotUniform() glorot_init.fill(Wdev_1) glorot_init.fill(Wdev_2) Whost_1 = Wdev_1.get() Whost_2 = Wdev_2.get() mean_1 = np.mean(Whost_1) mean_2 = np.mean(Whost_2) assert np.abs(mean_1) > np.abs(mean_2) return
def test_lookuptable_rand_error(backend_default, basic_linargs, deltas_buffer): nin, nout, batch_size, vocab_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_glorot = GlorotUniform() layer = LookupTable(vocab_size=vocab_size, embedding_dim=nout, init=init_glorot) inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size) layer.configure(nin) layer.allocate() layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() layer.set_deltas(deltas_buffer) inputs = layer.be.array(inp.reshape((nin, batch_size))) out = layer.fprop(inputs).get() W = layer.W.get() for i in range(nin * batch_size): assert np.all(W[inp[i]].T == out[:, i]) err = dtypeu(np.random.random((nout, nin * batch_size))) layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() unqidx, count = np.unique(inp, return_counts=True) dw_exp = np.zeros((1, nout)) for wrd_id, cnt in zip(unqidx, count): dw_exp[:] = 0 cnt_exp = 0 for i, w_id in enumerate(inp): if w_id == wrd_id: dw_exp[:] = dw_exp[:] + err[:, i] cnt_exp += 1 assert np.allclose(dw[wrd_id, :], dw_exp, atol=0, rtol=1e-4) assert np.allclose(dw_exp, dw[wrd_id, :], atol=0, rtol=1e-4) assert cnt == cnt_exp return
def test_bibn(backend_default, fargs): seq_len, input_size, hidden_size, batch_size = fargs in_shape = (input_size, seq_len) NervanaObject.be.bsz = batch_size # setup the bi-directional rnn init_glorot = GlorotUniform() birnn = BiBNRNN(hidden_size, activation=Logistic(), init=init_glorot) birnn.configure(in_shape) birnn.prev_layer = True birnn.allocate() birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)]) # test fprop # set the ff buffer inp_np = np.random.random(birnn.h_ff_buffer.shape) inp_be = birnn.be.array(inp_np) birnn.h_ff_buffer[:] = inp_np # compare the bn output with calling the backend bn xsum = birnn.be.zeros_like(birnn.xmean) xvar = birnn.be.zeros_like(birnn.xvar) gmean = birnn.be.zeros_like(birnn.gmean) gvar = birnn.be.zeros_like(birnn.gvar) gamma = birnn.be.ones(birnn.gamma.shape) beta = birnn.be.zeros_like(birnn.beta) grad_gamma = birnn.be.zeros_like(gamma) grad_beta = birnn.be.zeros_like(beta) out_ref = birnn.be.zeros_like(birnn.h_ff_buffer) xsum[:] = birnn.be.sum(birnn.h_ff_buffer, axis=1) birnn.be.compound_fprop_bn(birnn.h_ff_buffer, xsum, xvar, gmean, gvar, gamma, beta, out_ref, birnn.eps, birnn.rho, accumbeta=0, relu=False) # call the bibnrnn layer fprop_bn out_bn = birnn._fprop_bn(birnn.h_ff_buffer, inference=False) assert allclose_with_out(out_bn.get(), out_ref.get(), rtol=0.0, atol=1.0e-5) # test bprop err_np = np.random.random(birnn.h_ff_buffer.shape) err_be = birnn.be.array(err_np) err_out_ref = birnn.be.empty_like(err_be) birnn.be.compound_bprop_bn(err_out_ref, grad_gamma, grad_beta, err_be, inp_be, xsum, xvar, gamma, birnn.eps) err_out_bn = birnn._bprop_bn(err_be, out_bn) assert allclose_with_out(err_out_bn.get(), err_out_ref.get(), rtol=0.0, atol=1.0e-5)