def gradient_calc(seq_len, input_size, hidden_size, batch_size, epsilon=None, rand_scale=None, inp_bl=None): NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size input_shape = (input_size, seq_len * batch_size) # generate input if one is not given if inp_bl is None: inp_bl = np.random.randn(*input_shape) # neon gru instance gru = GRU(hidden_size, init=Gaussian(), activation=Tanh(), gate_activation=Logistic()) inpa = gru.be.array(np.copy(inp_bl)) # run fprop on the baseline input gru.configure((input_size, seq_len)) gru.prev_layer = True gru.allocate() gru.set_deltas([gru.be.iobuf(gru.in_shape)]) out_bl = gru.fprop(inpa).get() # random scaling/hash to generate fake loss if rand_scale is None: rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0 # loss function would be: # loss_bl = np.sum(rand_scale * out_bl) # run back prop with rand_scale as the errors # use copy to avoid any interactions deltas_neon = gru.bprop(gru.be.array(np.copy(rand_scale))).get() # add a perturbation to each input element grads_est = np.zeros(inpa.shape) inp_pert = inp_bl.copy() for pert_ind in range(inpa.size): save_val = inp_pert.flat[pert_ind] inp_pert.flat[pert_ind] = save_val + epsilon reset_gru(gru) gru.allocate() out_pos = gru.fprop(gru.be.array(inp_pert)).get() inp_pert.flat[pert_ind] = save_val - epsilon reset_gru(gru) gru.allocate() out_neg = gru.fprop(gru.be.array(inp_pert)).get() # calculate the loss with perturbations loss_pos = np.sum(rand_scale * out_pos) loss_neg = np.sum(rand_scale * out_neg) # compute the gradient estimate grad = 0.5 / float(epsilon) * (loss_pos - loss_neg) grads_est.flat[pert_ind] = grad # reset the perturbed input element inp_pert.flat[pert_ind] = save_val del gru return (grads_est, deltas_neon)
def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) map_list = opt._map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
onehot_input=False) # weight initialization init = Uniform(low=-0.1, high=0.1) # model initialization rlayer_params = { "output_size": hidden_size, "init": init, "activation": Tanh(), "gate_activation": Logistic() } if args.rlayer_type == 'lstm': rlayer1, rlayer2 = LSTM(**rlayer_params), LSTM(**rlayer_params) else: rlayer1, rlayer2 = GRU(**rlayer_params), GRU(**rlayer_params) layers = [ LookupTable(vocab_size=len(train_set.vocab), embedding_dim=hidden_size, init=init), rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) # vanilla gradient descent with decay schedule on learning rate and gradient scaling learning_rate_sched = Schedule(list(range(5, args.epochs)), .5) optimizer = GradientDescentMomentum(1,
gradient_clip_value = 5 # load data and parse on character-level ticker_task = CopyTask(seq_len_max, vec_size) train_set = Ticker(ticker_task) # weight initialization init = Uniform(low=-0.08, high=0.08) output_size = 8 N = 120 # number of memory locations M = 8 # size of a memory location # model initialization layers = [ GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()), Affine(train_set.nout, init, bias=init, activation=Logistic()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyBinary()) model = Model(layers=layers) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, **args.callback_args) # we can use the training set as the validation set, # since the data is tickerally generated
# weight initialization init = Uniform(low=-0.08, high=0.08) # conditional recurrent autoencoder model num_layers = 1 encoder, decoder = [], [] # decoder_connections indicates the encoder layer indicies to receive conditional inputs from decoder_connections = [] for ii in range(num_layers): name = "GRU" + str(ii + 1) encoder.append( GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, name=name + "Enc")) decoder.append( GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, name=name + "Dec")) decoder_connections.append(ii) decoder.append( Affine(train_set.nout, init, bias=init, activation=Softmax(),
def check_gru(seq_len, input_size, hidden_size, batch_size, init_func, inp_moms=[0.0, 1.0], add_init_state=False): # init_func is the initializer for the model params # inp_moms is the [ mean, std dev] of the random input input_shape = (input_size, seq_len * batch_size) output_shape = (hidden_size, seq_len * batch_size) slice_shape = (hidden_size, batch_size) NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size # neon GRU gru = GRU(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic()) # generate random input tensor inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0] inp_dev = gru.be.array(inp) # generate random deltas tensor deltas = np.random.randn(*output_shape) # run neon fprop gru.configure((input_size, seq_len)) gru.prev_layer = True gru.allocate() test_buffer = DeltasTree() gru.allocate_deltas(test_buffer) test_buffer.allocate_buffers() gru.set_deltas(test_buffer) if add_init_state: init_state = np.random.rand(*slice_shape) * inp_moms[1] + inp_moms[0] init_state_dev = gru.be.array(init_state) gru.fprop(inp_dev, init_state=init_state_dev) else: gru.fprop(inp_dev) # reference numpy GRU gru_ref = RefGRU(input_size, hidden_size) WGRU = gru_ref.weights # make ref weights and biases the same with neon model r_range = list(range(hidden_size)) z_range = list(range(hidden_size, hidden_size * 2)) c_range = list(range(hidden_size * 2, hidden_size * 3)) WGRU[gru_ref.weights_ind_br][:] = gru.b.get()[r_range] WGRU[gru_ref.weights_ind_bz][:] = gru.b.get()[z_range] WGRU[gru_ref.weights_ind_bc][:] = gru.b.get()[c_range] WGRU[gru_ref.weights_ind_Wxr][:] = gru.W_input.get()[r_range] WGRU[gru_ref.weights_ind_Wxz][:] = gru.W_input.get()[z_range] WGRU[gru_ref.weights_ind_Wxc][:] = gru.W_input.get()[c_range] WGRU[gru_ref.weights_ind_Rhr][:] = gru.W_recur.get()[r_range] WGRU[gru_ref.weights_ind_Rhz][:] = gru.W_recur.get()[z_range] WGRU[gru_ref.weights_ind_Rhc][:] = gru.W_recur.get()[c_range] # transpose input X and do fprop # the reference code expects these shapes: # input_shape: (seq_len, input_size, batch_size) # output_shape: (seq_len, hidden_size, batch_size) inp_ref = inp.copy().T.reshape(seq_len, batch_size, input_size).swapaxes(1, 2) deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size).swapaxes(1, 2) if add_init_state: init_state_ref = init_state.copy() (dWGRU_ref, h_ref_list, dh_ref_list, dr_ref_list, dz_ref_list, dc_ref_list) = gru_ref.lossFun(inp_ref, deltas_ref, init_state_ref) else: (dWGRU_ref, h_ref_list, dh_ref_list, dr_ref_list, dz_ref_list, dc_ref_list) = gru_ref.lossFun(inp_ref, deltas_ref) neon_logger.display('====Verifying hidden states====') assert allclose_with_out(gru.outputs.get(), h_ref_list, rtol=0.0, atol=1.0e-5) neon_logger.display('fprop is verified') # now test the bprop neon_logger.display('Making sure neon GRU matches numpy GRU in bprop') gru.bprop(gru.be.array(deltas)) # grab the delta W from gradient buffer dWinput_neon = gru.dW_input.get() dWrecur_neon = gru.dW_recur.get() db_neon = gru.db.get() dWxr_neon = dWinput_neon[r_range] dWxz_neon = dWinput_neon[z_range] dWxc_neon = dWinput_neon[c_range] dWrr_neon = dWrecur_neon[r_range] dWrz_neon = dWrecur_neon[z_range] dWrc_neon = dWrecur_neon[c_range] dbr_neon = db_neon[r_range] dbz_neon = db_neon[z_range] dbc_neon = db_neon[c_range] drzc_neon = gru.rzhcan_delta_buffer.get() dr_neon = drzc_neon[r_range] dz_neon = drzc_neon[z_range] dc_neon = drzc_neon[c_range] dWxr_ref = dWGRU_ref[gru_ref.dW_ind_Wxr] dWxz_ref = dWGRU_ref[gru_ref.dW_ind_Wxz] dWxc_ref = dWGRU_ref[gru_ref.dW_ind_Wxc] dWrr_ref = dWGRU_ref[gru_ref.dW_ind_Rhr] dWrz_ref = dWGRU_ref[gru_ref.dW_ind_Rhz] dWrc_ref = dWGRU_ref[gru_ref.dW_ind_Rhc] dbr_ref = dWGRU_ref[gru_ref.dW_ind_br] dbz_ref = dWGRU_ref[gru_ref.dW_ind_bz] dbc_ref = dWGRU_ref[gru_ref.dW_ind_bc] # neon_logger.display '====Verifying hidden deltas ====' neon_logger.display('====Verifying r deltas ====') assert allclose_with_out(dr_neon, dr_ref_list, rtol=0.0, atol=1.0e-5) neon_logger.display('====Verifying z deltas ====') assert allclose_with_out(dz_neon, dz_ref_list, rtol=0.0, atol=1.0e-5) neon_logger.display('====Verifying hcan deltas ====') assert allclose_with_out(dc_neon, dc_ref_list, rtol=0.0, atol=1.0e-5) neon_logger.display('====Verifying update on W_input====') neon_logger.display('dWxr') assert allclose_with_out(dWxr_neon, dWxr_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('dWxz') assert allclose_with_out(dWxz_neon, dWxz_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('dWxc') assert allclose_with_out(dWxc_neon, dWxc_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('====Verifying update on W_recur====') neon_logger.display('dWrr') assert allclose_with_out(dWrr_neon, dWrr_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('dWrz') assert allclose_with_out(dWrz_neon, dWrz_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('dWrc') assert allclose_with_out(dWrc_neon, dWrc_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('====Verifying update on bias====') neon_logger.display('dbr') assert allclose_with_out(dbr_neon, dbr_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('dbz') assert allclose_with_out(dbz_neon, dbz_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('dbc') assert allclose_with_out(dbc_neon, dbc_ref, rtol=0.0, atol=1.0e-5) neon_logger.display('bprop is verified') return
train_set = TextNMT(time_steps, train_path, get_prev_target=True, onehot_input=False, split='train', dataset=dataset, subset_pct=args.subset_pct) valid_set = TextNMT(time_steps, train_path, get_prev_target=False, onehot_input=False, split='valid', dataset=dataset) # weight initialization init = Uniform(low=-0.08, high=0.08) # Standard or Conditional encoder / decoder: encoder = [LookupTable(vocab_size=len(train_set.s_vocab), embedding_dim=embedding_dim, init=init, name="LUT_en")] decoder = [LookupTable(vocab_size=len(train_set.t_vocab), embedding_dim=embedding_dim, init=init, name="LUT_de")] decoder_connections = [] # link up recurrent layers for ii in range(num_layers): encoder.append(GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, name="GRU1Enc")) decoder.append(GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), reset_cells=True, name="GRU1Dec")) decoder_connections.append(ii) decoder.append(Affine(train_set.nout, init, bias=init, activation=Softmax(), name="Affout")) layers = Seq2Seq([encoder, decoder], decoder_connections=decoder_connections, name="Seq2Seq") cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding)
# weight initialization init = Uniform(low=-0.08, high=0.08) # model initialization if args.rlayer_type == 'lstm': rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) else: rlayer1 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) rlayer2 = GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()) layers = [ rlayer1, rlayer2, Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers)
def test_conv_rnn(backend_default): train_shape = (1, 17, 142) be = backend_default inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz)) delta = be.array(be.rng.randn(10, be.bsz)) init_norm = Gaussian(loc=0.0, scale=0.01) bilstm = DeepBiLSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), depth=1, reset_cells=True) birnn_1 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False) birnn_2 = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=2, reset_cells=True, batch_norm=False) bibnrnn = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=True) birnnsum = DeepBiRNN(128, init_norm, activation=Rectlin(), depth=1, reset_cells=True, batch_norm=False, bi_sum=True) rnn = Recurrent(128, init=init_norm, activation=Rectlin(), reset_cells=True) lstm = LSTM(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) gru = GRU(128, init_norm, activation=Rectlin(), gate_activation=Rectlin(), reset_cells=True) rlayers = [bilstm, birnn_1, birnn_2, bibnrnn, birnnsum, rnn, lstm, gru] for rl in rlayers: layers = [ Conv((2, 2, 4), init=init_norm, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init_norm, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), rl, RecurrentMean(), Affine(nout=10, init=init_norm, activation=Rectlin()), ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.initialize(train_shape, cost) model.fprop(inp) model.bprop(delta)
def check_gru(seq_len, input_size, hidden_size, batch_size, init_func, inp_moms=[0.0, 1.0]): # init_func is the initializer for the model params # inp_moms is the [ mean, std dev] of the random input input_shape = (input_size, seq_len * batch_size) output_shape = (hidden_size, seq_len * batch_size) NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size # neon GRU gru = GRU(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic()) # generate random input tensor inp = np.random.rand(*input_shape)*inp_moms[1] + inp_moms[0] inpa = gru.be.array(inp) # generate random deltas tensor deltas = np.random.randn(*output_shape) # run neon fprop gru.fprop(inpa) # reference numpy GRU gru_ref = RefGRU(input_size, hidden_size) WGRU = gru_ref.weights # make ref weights and biases the same with neon model r_range = range(hidden_size) z_range = range(hidden_size, hidden_size * 2) c_range = range(hidden_size * 2, hidden_size * 3) WGRU[gru_ref.weights_ind_br][:] = gru.b.get()[r_range] WGRU[gru_ref.weights_ind_bz][:] = gru.b.get()[z_range] WGRU[gru_ref.weights_ind_bc][:] = gru.b.get()[c_range] WGRU[gru_ref.weights_ind_Wxr][:] = gru.W_input.get()[r_range] WGRU[gru_ref.weights_ind_Wxz][:] = gru.W_input.get()[z_range] WGRU[gru_ref.weights_ind_Wxc][:] = gru.W_input.get()[c_range] WGRU[gru_ref.weights_ind_Rhr][:] = gru.W_recur.get()[r_range] WGRU[gru_ref.weights_ind_Rhz][:] = gru.W_recur.get()[z_range] WGRU[gru_ref.weights_ind_Rhc][:] = gru.W_recur.get()[c_range] # transpose input X and do fprop # the reference code expects these shapes: # input_shape: (seq_len, input_size, batch_size) # output_shape: (seq_len, hidden_size, batch_size) inp_ref = inp.copy().T.reshape( seq_len, batch_size, input_size).swapaxes(1, 2) deltas_ref = deltas.copy().T.reshape( seq_len, batch_size, hidden_size).swapaxes(1, 2) (dWGRU_ref, h_ref_list, dh_ref_list, dr_ref_list, dz_ref_list, dc_ref_list) = gru_ref.lossFun(inp_ref, deltas_ref) print '====Verifying hidden states====' print allclose_with_out(gru.h_buffer.get(), h_ref_list, rtol=0.0, atol=1.0e-5) print 'fprop is verified' # now test the bprop print 'Making sure neon GRU match numpy GRU in bprop' gru.bprop(gru.be.array(deltas)) # grab the delta W from gradient buffer dWinput_neon = gru.dW_input.get() dWrecur_neon = gru.dW_recur.get() db_neon = gru.db.get() dWxr_neon = dWinput_neon[r_range] dWxz_neon = dWinput_neon[z_range] dWxc_neon = dWinput_neon[c_range] dWrr_neon = dWrecur_neon[r_range] dWrz_neon = dWrecur_neon[z_range] dWrc_neon = dWrecur_neon[c_range] dbr_neon = db_neon[r_range] dbz_neon = db_neon[z_range] dbc_neon = db_neon[c_range] drzc_neon = gru.rzhcan_delta_buffer.get() dr_neon = drzc_neon[r_range] dz_neon = drzc_neon[z_range] dc_neon = drzc_neon[c_range] dWxr_ref = dWGRU_ref[gru_ref.dW_ind_Wxr] dWxz_ref = dWGRU_ref[gru_ref.dW_ind_Wxz] dWxc_ref = dWGRU_ref[gru_ref.dW_ind_Wxc] dWrr_ref = dWGRU_ref[gru_ref.dW_ind_Rhr] dWrz_ref = dWGRU_ref[gru_ref.dW_ind_Rhz] dWrc_ref = dWGRU_ref[gru_ref.dW_ind_Rhc] dbr_ref = dWGRU_ref[gru_ref.dW_ind_br] dbz_ref = dWGRU_ref[gru_ref.dW_ind_bz] dbc_ref = dWGRU_ref[gru_ref.dW_ind_bc] # print '====Verifying hidden deltas ====' print '====Verifying r deltas ====' assert allclose_with_out(dr_neon, dr_ref_list, rtol=0.0, atol=1.0e-5) print '====Verifying z deltas ====' assert allclose_with_out(dz_neon, dz_ref_list, rtol=0.0, atol=1.0e-5) print '====Verifying hcan deltas ====' assert allclose_with_out(dc_neon, dc_ref_list, rtol=0.0, atol=1.0e-5) print '====Verifying update on W_input====' print 'dWxr' assert allclose_with_out(dWxr_neon, dWxr_ref, rtol=0.0, atol=1.0e-5) print 'dWxz' assert allclose_with_out(dWxz_neon, dWxz_ref, rtol=0.0, atol=1.0e-5) print 'dWxc' assert allclose_with_out(dWxc_neon, dWxc_ref, rtol=0.0, atol=1.0e-5) print '====Verifying update on W_recur====' print 'dWrr' assert allclose_with_out(dWrr_neon, dWrr_ref, rtol=0.0, atol=1.0e-5) print 'dWrz' assert allclose_with_out(dWrz_neon, dWrz_ref, rtol=0.0, atol=1.0e-5) print 'dWrc' assert allclose_with_out(dWrc_neon, dWrc_ref, rtol=0.0, atol=1.0e-5) print '====Verifying update on bias====' print 'dbr' assert allclose_with_out(dbr_neon, dbr_ref, rtol=0.0, atol=1.0e-5) print 'dbz' assert allclose_with_out(dbz_neon, dbz_ref, rtol=0.0, atol=1.0e-5) print 'dbc' assert allclose_with_out(dbc_neon, dbc_ref, rtol=0.0, atol=1.0e-5) print 'bprop is verified' return