def test_linear_zeros(backend_default, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_unif = Uniform(low=0.0, high=0.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.random.random((nin, batch_size)))) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.set_deltas([layer.be.iobuf(nin)]) out = layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = dtypeu(np.zeros((nout, batch_size))) deltas = layer.bprop(layer.be.array(err)).asnumpyarray() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = layer.dW.asnumpyarray() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_linear_ones(backend_default, basic_linargs): # basic sanity check with all ones on the inputs # and weights, check that each row in output # is the sum of the weights for that output # this check will confirm that the correct number # of operations is being run nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_unif = Uniform(low=1.0, high=1.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.ones((nin, batch_size)))) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.set_deltas([layer.be.iobuf(nin)]) out = layer.fprop(inp).get() w = layer.W.get() sums = np.sum(w, 1).reshape((nout, 1)) * np.ones((1, batch_size)) # for larger layers need to estimate numerical precision # atol = est_mm_prec(w, inp.get()) assert (np.allclose(sums, out, atol=0.0, rtol=0.0), '%e' % np.max(np.abs(out - sums))) return
def test_linear_ones(backend_default, basic_linargs): # basic sanity check with all ones on the inputs # and weights, check that each row in output # is the sum of the weights for that output # this check will confirm that the correct number # of operations is being run nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_unif = Uniform(low=1.0, high=1.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.ones((nin, batch_size)))) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.set_deltas([layer.be.iobuf(nin)]) out = layer.fprop(inp).asnumpyarray() w = layer.W.asnumpyarray() sums = np.sum(w, 1).reshape((nout, 1))*np.ones((1, batch_size)) # for larger layers need to estimate numerical precision # atol = est_mm_prec(w, inp.asnumpyarray()) assert (np.allclose(sums, out, atol=0.0, rtol=0.0), '%e' % np.max(np.abs(out-sums))) return
def test_all_rand(backend_default, allrand_args): # test with random weights and random inputs dtypeu = np.float32 w_rng, rngmax = allrand_args inp_rng = [0.0, rngmax] nin = 1024 nout = 2048 batch_size = 16 NervanaObject.be.bsz = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layer = Linear(nout=nout, init=init_unif) inp = np.random.random((nin, batch_size)) inp *= inp_rng[1] - inp_rng[0] inp += inp_rng[0] inp = inp.astype(dtypeu) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.set_deltas([layer.be.iobuf(nin)]) out = layer.fprop(layer.be.array(inp)).asnumpyarray() w = layer.W.asnumpyarray() # the expected output using numpy out_exp = np.dot(w, inp) # for larger layers need to estimate numerical precision atol = 2 * est_mm_prec(w, inp, ntrials=1) assert (np.allclose(out_exp, out, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(out - out_exp)), atol)) err = np.random.random((nout, batch_size)) err = err * (inp_rng[1] - inp_rng[0]) + inp_rng[0] err = err.astype(dtypeu) deltas = layer.bprop(layer.be.array(err)).asnumpyarray() dw = layer.dW.asnumpyarray() deltas_exp = np.dot(w.T, err) atol = 2 * est_mm_prec(w.T, err, ntrials=1) assert (np.allclose(deltas_exp, deltas, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(deltas_exp - deltas)), atol)) dw_exp = np.dot(err, inp.T) atol = 2 * est_mm_prec(err, inp.T, ntrials=1) assert (np.allclose(dw_exp, dw, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(dw_exp - dw)), atol)) return
def test_concat_sequence_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with concat dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size input steps nin = 128 steps = [32, 64] nout = 256 batch_size = 16 NervanaObject.be.bsz = NervanaObject.be.bs = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Linear(nout=nout, init=init_unif) for _ in range(2)] inputs = [layers[0].be.array(dtypeu(np.random.random((nin, batch_size*step)))) for step in steps] merge = MergeConcatSequence(layers) assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() out = merge.fprop(inputs).asnumpyarray() weights = [layer.W.asnumpyarray() for layer in layers] out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1) assert np.allclose(out, out_exp, atol=1e-3) err_lst = [dtypeu(np.random.random((nout, batch_size*step))) for step in steps] err_concat = layers[0].be.array(np.concatenate(err_lst, axis=1)) merge.bprop(err_concat) dW_exp_lst = [np.dot(err, inp.asnumpyarray().T) for (err, inp) in zip(err_lst, inputs)] for layer, dW_exp in zip(layers, dW_exp_lst): assert np.allclose(layer.dW.asnumpyarray(), dW_exp) return
def test_sum_l1_l1(backend_default, allrand_args): # test two linear layers that are merged with sum dtypeu = np.float32 w_rng, rngmax = allrand_args # Diff size inputs and outputs nins = [128, 1024] nouts = [64, 64] batch_size = 16 NervanaObject.be.bsz = NervanaObject.be.bs = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layers = [Linear(nout=nout, init=init_unif) for nout in nouts] inputs = [layers[0].be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins] merge = MergeSum(layers) assert(len(inputs) == len(layers)) merge.configure(inputs) merge.allocate() out = merge.fprop(inputs).asnumpyarray() weights = [layer.W.asnumpyarray() for layer in layers] out_exp = sum([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)]) assert np.allclose(out, out_exp, atol=1e-3) err = dtypeu(np.random.random((nouts[0], batch_size))) merge.bprop(layers[0].be.array(err)) dW_exp_lst = [np.dot(err, inp.asnumpyarray().T) for inp in inputs] for layer, dW_exp in zip(layers, dW_exp_lst): assert np.allclose(layer.dW.asnumpyarray(), dW_exp) return
def test_all_rand(backend, allrand_args): # test with random weights and random inputs dtypeu = np.float32 w_rng, rngmax = allrand_args inp_rng = [0.0, rngmax] nin = 1024 nout = 2048 batch_size = 16 NervanaObject.be.bsz = NervanaObject.be.bs = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layer = Linear(nout=nout, init=init_unif) inp = np.random.random((nin, batch_size)) inp *= inp_rng[1] - inp_rng[0] inp += inp_rng[0] inp = inp.astype(dtypeu) out = layer.fprop(layer.be.array(inp)).asnumpyarray() w = layer.W.asnumpyarray() # the expected output using numpy out_exp = np.dot(w, inp) # for larger layers need to estimate numerical precision atol = 2 * est_mm_prec(w, inp, ntrials=1) assert (np.allclose(out_exp, out, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(out - out_exp)), atol)) err = np.random.random((nout, batch_size)) err = err * (inp_rng[1] - inp_rng[0]) + inp_rng[0] err = err.astype(dtypeu) deltas = layer.bprop(layer.be.array(err)).asnumpyarray() dw = layer.dW.asnumpyarray() deltas_exp = np.dot(w.T, err) atol = 2 * est_mm_prec(w.T, err, ntrials=1) assert (np.allclose(deltas_exp, deltas, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(deltas_exp - deltas)), atol)) dw_exp = np.dot(err, inp.T) atol = 2 * est_mm_prec(err, inp.T, ntrials=1) assert (np.allclose(dw_exp, dw, atol=atol, rtol=0.0), '%e %e' % (np.max(np.abs(dw_exp - dw)), atol)) return
def test_linear_zeros(backend, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = NervanaObject.be.bs = batch_size dtypeu = np.float32 init_unif = Uniform(low=0.0, high=0.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.random.random((nin, batch_size)))) out = layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = dtypeu(np.zeros((nout, batch_size))) deltas = layer.bprop(layer.be.array(err)).asnumpyarray() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = layer.dW.asnumpyarray() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
init=init, batch_norm=False, activation=lrelu) # what's about BatchNorm Layer and batch_norm parameter? conv2 = dict(init=init, batch_norm=True, activation=lrelu, padding=2) conv3 = dict(init=init, batch_norm=True, activation=lrelu, padding=1) D_layers = [ Conv((5, 5, 5, 32), **conv1), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv2), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv2), Dropout(keep=0.8), Conv((5, 5, 5, 8), **conv3), Dropout(keep=0.8), Pooling((2, 2, 2)), # what's about the Flatten Layer? Linear(1, init=init) ] #what's about the activation function? # generator using covolution layers latent_size = 200 relu = Rectlin(slope=0) # relu for generator conv4 = dict(init=init, batch_norm=True, activation=lrelu, dilation=dict(dil_h=2, dil_w=2, dil_d=2)) conv5 = dict(init=init, batch_norm=True, activation=lrelu, padding=dict(pad_h=2, pad_w=2, pad_d=0), dilation=dict(dil_h=2, dil_w=2, dil_d=3)) conv6 = dict(init=init,
def test_linear_zeros(backend_default, basic_linargs): # basic sanity check with 0 weights random inputs nin, nout, batch_size = basic_linargs NervanaObject.be.bsz = batch_size dtypeu = np.float32 init_unif = Uniform(low=0.0, high=0.0) layer = Linear(nout=nout, init=init_unif) inp = layer.be.array(dtypeu(np.random.random((nin, batch_size)))) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.set_deltas([layer.be.iobuf(nin)]) out = layer.fprop(inp).get() assert np.min(out) == 0.0 and np.max(out) == 0.0 err = dtypeu(np.zeros((nout, batch_size))) deltas = layer.bprop(layer.be.array(err)).get() assert np.min(deltas) == 0.0 and np.max(deltas) == 0.0 dw = layer.dW.get() assert np.min(dw) == 0.0 and np.max(dw) == 0.0 return
def test_all_rand(backend_default, allrand_args, deltas_buffer): # test with random weights and random inputs dtypeu = np.float32 w_rng, rngmax = allrand_args inp_rng = [0.0, rngmax] nin = 1024 nout = 2048 batch_size = 16 NervanaObject.be.bsz = batch_size init_unif = Uniform(low=w_rng[0], high=w_rng[1]) layer = Linear(nout=nout, init=init_unif) inp = np.random.random((nin, batch_size)) inp *= inp_rng[1] - inp_rng[0] inp += inp_rng[0] inp = inp.astype(dtypeu) layer.configure(nin) layer.prev_layer = True # Hack to force delta buffer allocation layer.allocate() layer.allocate_deltas(deltas_buffer) deltas_buffer.allocate_buffers() layer.set_deltas(deltas_buffer) out = layer.fprop(layer.be.array(inp)).get() w = layer.W.get() # the expected output using numpy out_exp = np.dot(w, inp) # for larger layers need to estimate numerical precision atol = 2 * est_mm_prec(w, inp, ntrials=1) assert allclose_with_out(out_exp, out, atol=atol, rtol=0.0), \ '%e %e' % (np.max(np.abs(out - out_exp)), atol) err = np.random.random((nout, batch_size)) err = err * (inp_rng[1] - inp_rng[0]) + inp_rng[0] err = err.astype(dtypeu) deltas = layer.bprop(layer.be.array(err)).get() dw = layer.dW.get() deltas_exp = np.dot(w.T, err) atol = 2 * est_mm_prec(w.T, err, ntrials=1) assert allclose_with_out(deltas_exp, deltas, atol=atol, rtol=0.0), \ '%e %e' % (np.max(np.abs(deltas_exp - deltas)), atol) dw_exp = np.dot(err, inp.T) atol = 2 * est_mm_prec(err, inp.T, ntrials=1) assert allclose_with_out(dw_exp, dw, atol=atol, rtol=0.0), \ '%e %e' % (np.max(np.abs(dw_exp - dw)), atol) return