def test_birnn_fprop(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, init_state, sum_out, concat_out, transformer_factory): assert batch_size == 1, "the recurrent reference implementation only support batch size 1" # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights( input_placeholder, hidden_size, weight_initializer, bias_initializer, init_state) # Compute reference numpy RNN rnn_ref = RefBidirectional(input_size, hidden_size, return_sequence=return_sequence, sum_out=sum_out, concat_out=concat_out) rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.fwd_rnn.bh.shape)) h_ref_list = rnn_ref.fprop(input_value.transpose([1, 0, 2]), init_states=init_state_value) # Generate ngraph RNN rnn_ng = BiRNN(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(), reset_cells=True, return_sequence=return_sequence, sum_out=sum_out, concat_out=concat_out) # fprop ngraph RNN out_ng = rnn_ng(input_placeholder, init_state=init_state) with ExecutorFactory() as ex: # Create computation and execute if init_state is not None: fprop_neon_fun = ex.executor(out_ng, input_placeholder, init_state) fprop_neon = fprop_neon_fun(input_value, init_state_value) else: fprop_neon_fun = ex.executor(out_ng, input_placeholder) fprop_neon = fprop_neon_fun(input_value) # Compare output with reference implementation if not isinstance(fprop_neon, tuple): fprop_neon = [fprop_neon] h_ref_list = [h_ref_list] for ii, output in enumerate(fprop_neon): if return_sequence is True: output = output[:, :, 0] ng.testing.assert_allclose(output, h_ref_list[ii], rtol=fprop_rtol, atol=fprop_atol)
def test_learning_policy_schedule(transformer_factory, drop_factor): base_learning_rate = 1.0 schedule = [20, 100, 300, 750, 1000] lr_params = { 'name': 'schedule', 'base_lr': base_learning_rate, 'gamma': drop_factor, 'schedule': schedule } iteration = ng.placeholder((), dtype=np.dtype(np.uint32)) lro = LearningRateOptimizer(learning_rate=lr_params, iteration=iteration) schedule.append(np.inf) np_schedule = np.array(schedule) with ExecutorFactory() as ex: scheduled_learning_rate = ex.transformer.computation( lro.lrate, iteration) for iter_input in np.random.randint(0, 1100, 5): baseline_value = scheduled_learning_rate(iter_input) max_step_ind = np.where(iter_input < np_schedule)[0][0] if isinstance(drop_factor, list): scale_factor = np.prod(drop_factor[:max_step_ind]) else: scale_factor = drop_factor**max_step_ind reference_value = base_learning_rate * scale_factor assert ng.testing.allclose(baseline_value, reference_value, rtol=1e-5)
def test_batchnorm_bprop(input_placeholder, bn_params, transformer_factory): layer = BatchNorm(**bn_params) fprop = layer(input_placeholder) # Derivatives to check bprop_vars = [input_placeholder, layer.gamma, layer.beta] delta_placeholder = ng.placeholder(fprop.axes) bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars] with ExecutorFactory() as ex: # Create derivative executor bprop_function = ex.executor(bprops, input_placeholder, delta_placeholder) # Generate data x = rng.uniform(0, 1, input_placeholder.axes) delta = rng.uniform(-.1, .1, delta_placeholder.axes) # Compute reference bprop dx_ref, dgamma_ref, dbeta_ref = BatchNormReference( x, **bn_params).bprop(delta) # Compute ngraph bprop dx, dgamma, dbeta = bprop_function(x, delta) assert ng.testing.allclose(dx, dx_ref, rtol=rtol, atol=atol) assert ng.testing.allclose(dgamma, dgamma_ref, rtol=rtol, atol=atol) assert ng.testing.allclose(dbeta, dbeta_ref, rtol=rtol, atol=atol)
def compare_optimizer_variable_select(opt_ng, opt_ref): # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested np_W1 = np.random.rand(C.length) np_W2 = np.random.rand(C.length) W1 = ng.variable([C], initial_value=np_W1) W2 = ng.variable([C], initial_value=np_W2) # Set up op graph cost = ng.sum(target - ng.dot(W1, data) - ng.dot(W2, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost, variables=[W1]), W1]) # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation([updated_weights, W2], data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: [ng_W1, ng_W2] = opt_ng_comp(x, y) # updated weights for ngraph optimizer np_W1 = opt_ref(x, np_W1) # updated weights for reference optimizer ng.testing.assert_allclose(np_W1, ng_W1, rtol=1e-3) ng.testing.assert_allclose(np_W2, ng_W2, rtol=1e-3)
def test_conv_batchnorm_fprop(conv_input_placeholder, bn_params): """This checks that that we are doing batch norm across multiple axes and properly tracking the side effect variables """ layer = BatchNorm(**bn_params) fprop = layer(conv_input_placeholder) with ExecutorFactory() as ex: # Compute executors fprop_function = ex.executor(fprop, conv_input_placeholder) stats_function = ex.executor([ng.value_of(layer.gmean), ng.value_of(layer.gvar)]) # Initial conditions for tracked variables bn_params['gmean'] = 0.0 bn_params['gvar'] = 1.0 bn_params['axis'] = (1, 2, 3, ) # Test over 2 iterations to make sure values update properly for i in range(2): # Generate data x = rng.uniform(0, 1, conv_input_placeholder.axes) # Compute reference fprop and stats batch_norm_reference = BatchNormReference(x, **bn_params) out_ref, bn_params['gmean'], bn_params['gvar'] = batch_norm_reference.fprop # Compute ngraph fprop and stats out = fprop_function(x) gm, gv = stats_function() ng.testing.assert_allclose(out, out_ref, rtol=rtol, atol=atol) ng.testing.assert_allclose(gm, bn_params['gmean'], rtol=rtol, atol=atol) ng.testing.assert_allclose(gv, bn_params['gvar'], rtol=rtol, atol=atol)
def test_batchnorm_bprop(input_placeholder, bn_params, transformer_factory): if input_placeholder._axes.lengths == (32, 32): pytest.config.flex_skip_now( "Results mismatch - too strict tolerance (rtol, atol)") layer = BatchNorm(**bn_params) fprop = layer(input_placeholder) # Derivatives to check bprop_vars = [input_placeholder, layer.gamma, layer.beta] delta_placeholder = ng.placeholder(fprop.axes) bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars] with ExecutorFactory() as ex: # Create derivative executor bprop_function = ex.executor(bprops, input_placeholder, delta_placeholder) # Generate data x = rng.uniform(0, 1, input_placeholder.axes) delta = rng.uniform(-.1, .1, delta_placeholder.axes) # Compute reference bprop dx_ref, dgamma_ref, dbeta_ref = BatchNormReference( x, **bn_params).bprop(delta) # Compute ngraph bprop dx, dgamma, dbeta = bprop_function(x, delta) ng.testing.assert_allclose(dx, dx_ref, rtol=rtol, atol=atol) ng.testing.assert_allclose(dgamma, dgamma_ref, rtol=rtol, atol=atol) ng.testing.assert_allclose(dbeta, dbeta_ref, rtol=rtol, atol=atol)
def test_learning_policy_step(transformer_factory): base_learning_rate = 1.0 drop_factor = 0.1 step = 20 lr_params = { 'name': 'step', 'base_lr': base_learning_rate, 'gamma': drop_factor, 'step': step } iteration = ng.placeholder((), dtype=np.dtype(np.uint32)) lro = LearningRateOptimizer(learning_rate=lr_params, iteration=iteration) with ExecutorFactory() as ex: stepped_learning_rate = ex.transformer.computation( lro.lrate, iteration) for iter_input in [10, 50, 90, 6, 15]: baseline_value = stepped_learning_rate(iter_input) reference_value = base_learning_rate * (drop_factor **(iter_input // step)) assert ng.testing.allclose(baseline_value, reference_value, rtol=1e-5)
def test_rnn_fprop(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, init_state, extra_axes, backward, transformer_factory): assert batch_size == 1, "the recurrent reference implementation only support batch size 1" # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size, extra_axes=extra_axes) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights( input_placeholder, hidden_size, weight_initializer, bias_initializer, init_state) # Compute reference numpy RNN rnn_ref = RefRecurrent(input_size, hidden_size, return_sequence=return_sequence) rnn_ref.set_weights(W_in.reshape(rnn_ref.Wxh.shape), W_rec, b.reshape(rnn_ref.bh.shape)) # Compute reference numpy RNN input_shape = (input_size, sequence_length, batch_size) h_ref_list = rnn_ref.fprop_only(input_value.reshape(input_shape).transpose( [1, 0, 2]), init_states=init_state_value, backward=backward) # Generate ngraph RNN rnn_ng = Recurrent(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(), reset_cells=True, return_sequence=return_sequence, backward=backward) # fprop ngraph RNN out_ng = rnn_ng(input_placeholder, init_state=init_state) with ExecutorFactory() as ex: # Create computation and execute if init_state is not None: fprop_neon_fun = ex.executor(out_ng, input_placeholder, init_state) fprop_neon = fprop_neon_fun(input_value, init_state_value) else: fprop_neon_fun = ex.executor(out_ng, input_placeholder) fprop_neon = fprop_neon_fun(input_value) # Compare output with reference implementation if return_sequence is True: fprop_neon = fprop_neon[:, :, 0] ng.testing.assert_allclose(fprop_neon, h_ref_list, rtol=fprop_rtol, atol=fprop_atol)
def test_birnn_deriv_numerical(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, sum_out, concat_out): # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size, weight_initializer, bias_initializer) # Generate ngraph RNN rnn_ng = BiRNN(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(), reset_cells=True, return_sequence=return_sequence, sum_out=sum_out, concat_out=concat_out) # fprop ngraph RNN out_ng = rnn_ng.train_outputs(input_placeholder) w_in_f = rnn_ng.fwd_rnn.W_input w_rec_f = rnn_ng.fwd_rnn.W_recur b_f = rnn_ng.fwd_rnn.b w_in_b = rnn_ng.bwd_rnn.W_input w_rec_b = rnn_ng.bwd_rnn.W_recur b_b = rnn_ng.bwd_rnn.b params_f = [(w_in_f, W_in), (w_rec_f, W_rec), (b_f, b)] params_b = [(w_in_b, W_in), (w_rec_b, W_rec), (b_b, b)] if sum_out or concat_out: out_ng = [out_ng] params_birnn = [params_f + params_b] else: # in this case out_ng will be a list params_birnn = [params_f, params_b] with ExecutorFactory() as ex: # Create derivative computations and execute param_updates = list() dep_list = list() for output, dependents in zip(out_ng, params_birnn): for px, _ in dependents: update = (ex.derivative(output, px, input_placeholder), ex.numeric_derivative(output, px, delta, input_placeholder)) param_updates.append(update) dep_list += dependents for ii, ((deriv_s, deriv_n), (_, val)) in enumerate(zip(param_updates, dep_list)): ng.testing.assert_allclose(deriv_s(val, input_value), deriv_n(val, input_value), rtol=num_rtol, atol=num_atol)
def test_learning_policy_fixed_without_input(): base_learning_rate = 0.1 lro = LearningRateOptimizer(learning_rate=base_learning_rate) with ExecutorFactory() as ex: fixed_learning_rate = ex.transformer.computation(lro.lrate) baseline_value = fixed_learning_rate() ng.testing.assert_allclose(baseline_value, base_learning_rate, rtol=1e-6)
def test_rnn_deriv_numerical(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, backward, init_state, transformer_factory): # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights( input_placeholder, hidden_size, weight_initializer, bias_initializer, init_state) # Generate ngraph RNN rnn_ng = Recurrent(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(), reset_cells=True, return_sequence=return_sequence, backward=backward) # fprop ngraph RNN out_ng = rnn_ng(input_placeholder, init_state=init_state) params = [(rnn_ng.W_input, W_in), (rnn_ng.W_recur, W_rec), (rnn_ng.b, b)] with ExecutorFactory() as ex: # Create derivative computations and execute param_updates = list() for px, _ in params: if init_state is not None: update = (ex.derivative(out_ng, px, input_placeholder, init_state), ex.numeric_derivative(out_ng, px, delta, input_placeholder, init_state)) else: update = (ex.derivative(out_ng, px, input_placeholder), ex.numeric_derivative(out_ng, px, delta, input_placeholder)) param_updates.append(update) for (deriv_s, deriv_n), (_, val) in zip(param_updates, params): if init_state is not None: ng.testing.assert_allclose(deriv_s(val, input_value, init_state_value), deriv_n(val, input_value, init_state_value), rtol=num_rtol, atol=num_atol) else: ng.testing.assert_allclose(deriv_s(val, input_value), deriv_n(val, input_value), rtol=num_rtol, atol=num_atol)
def test_rnn_deriv_ref(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, transformer_factory): assert batch_size == 1, "the recurrent reference implementation only support batch size 1" assert return_sequence is True, "the reference rnn only supports sequences for deriv" # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size, weight_initializer, bias_initializer) # Compute reference numpy RNN rnn_ref = RefRecurrent(input_size, hidden_size, return_sequence=return_sequence) rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.bh.shape)) # Prepare deltas for gradient check output_shape = (hidden_size, sequence_length, batch_size) # generate random deltas tensor deltas = np.random.randn(*output_shape) # the reference code expects these shapes: # input_shape: (seq_len, input_size, batch_size) # output_shape: (seq_len, hidden_size, batch_size) dW_in, dW_rec, db = rnn_ref.lossFun(input_value.transpose([1, 0, 2]), deltas.copy().transpose([1, 0, 2]), init_states=init_state_value)[:3] # Generate ngraph RNN rnn_ng = Recurrent(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(), reset_cells=True, return_sequence=return_sequence) # fprop ngraph RNN out_ng = rnn_ng.train_outputs(input_placeholder) deltas_constant = ng.constant(deltas, axes=out_ng.axes) params = [(rnn_ng.W_input, W_in), (rnn_ng.W_recur, W_rec), (rnn_ng.b, b)] with ExecutorFactory() as ex: # Create derivative computations and execute param_updates = list() for px, _ in params: update = ng.deriv(out_ng, px, error=deltas_constant) param_updates.append(ex.executor(update, input_placeholder)) for update_fun, ref_val in zip(param_updates, [dW_in, dW_rec, db]): ng.testing.assert_allclose(update_fun(input_value), ref_val.squeeze(), rtol=bprop_rtol, atol=bprop_atol)
def test_change_recurrent_axis_length(recurrent_layer_cls, batch_size, sequence_length, input_size, hidden_size): """ Recurrent layer support for changing REC axis length (needed by seq2seq inference) """ # create three identical recurrent layers with same weights W_input_val = np.random.normal(size=(hidden_size, input_size)) W_recur_val = np.random.normal(size=(hidden_size, hidden_size)) rec1 = recurrent_layer_cls(nout=hidden_size, init=ConstantInit(W_input_val), init_inner=ConstantInit(W_recur_val), activation=Tanh()) rec2 = recurrent_layer_cls(nout=hidden_size, init=ConstantInit(W_input_val), init_inner=ConstantInit(W_recur_val), activation=Tanh()) rec3 = recurrent_layer_cls(nout=hidden_size, init=ConstantInit(W_input_val), init_inner=ConstantInit(W_recur_val), activation=Tanh()) # create input placeholders and values # sequence length greater than 1 N = ng.make_axis(length=batch_size, name='N') REC = ng.make_axis(length=sequence_length, name='REC') M = ng.make_axis(length=input_size, name='M') xn_axes = ng.make_axes([M, REC, N]) xn = ng.placeholder(axes=xn_axes) xn_val = np.random.normal(size=(input_size, sequence_length, batch_size)) # sequence length 1 REC1 = ng.make_axis(length=1, name='REC') x1_axes = ng.make_axes([M, REC1, N]) x1 = ng.placeholder(axes=x1_axes) x1_val = np.random.normal(size=(input_size, 1, batch_size)) # check results of switching REC axis of a layer's input # computations switching REC axis y1_n = rec1(xn) y1_1 = rec1(x1) # check against not switching y2_n = rec2(xn) y3_1 = rec3(x1) with ExecutorFactory() as ex: y1_n_comp = ex.executor(y1_n, xn) y1_1_comp = ex.executor(y1_1, x1) y2_n_comp = ex.executor(y2_n, xn) y3_1_comp = ex.executor(y3_1, x1) ng.testing.assert_allclose(y1_n_comp(xn_val), y2_n_comp(xn_val)) ng.testing.assert_allclose(y1_1_comp(x1_val), y3_1_comp(x1_val))
def test_learning_policy_fixed_with_input(): base_learning_rate = 0.1 iteration = ng.placeholder((), dtype=np.dtype(np.uint32)) lro = LearningRateOptimizer(learning_rate=base_learning_rate, iteration=iteration) with ExecutorFactory() as ex: fixed_learning_rate = ex.transformer.computation(lro.lrate, iteration) for iter_input in [10, 50, 90, 6, 15]: baseline_value = fixed_learning_rate(iter_input) ng.testing.assert_allclose(baseline_value, base_learning_rate, rtol=1e-6)
def test_recurrent_batchnorm_fprop(RNN, recurrent_input, output_size, bn_params): """Compare fprop RNN with batch norm to numpy batch norm followed by rnn without""" helper = RNNHelper(recurrent_input, output_size, RNN, bn_params) # Get batch norm rnn graph fprop = helper.rnn(recurrent_input) # Get batch norm side effects stats = [ng.value_of(helper.gmean), ng.value_of(helper.gvar)] # Get reference graph reference_fprop = helper.reference_rnn(helper.reference_input) with ExecutorFactory() as ex: # Compute executors fprop_function = ex.executor(fprop, recurrent_input) stats_function = ex.executor(stats) reference_function = ex.executor(reference_fprop, helper.reference_input) # Initial conditions for tracked variables bn_params['gmean'] = 0.0 bn_params['gvar'] = 1.0 # Need to reduce over two positional axes in reference bn_params['axis'] = (1, 2) # Test over 2 iterations to make sure values update properly for _ in range(2): # Get network input values input_value = rng.uniform(-1, 1, recurrent_input.axes) # Compute reference values # First compute the weighted input weighted_input = np.dot(helper.W_in, input_value.swapaxes(0, 1)) # Compute reference batch norm batch_norm_reference = BatchNormReference(weighted_input, **bn_params) normed_input, bn_params['gmean'], bn_params['gvar'] = batch_norm_reference.fprop # Finally, get reference RNN output ref = reference_function(normed_input) # Get ngraph batch norm RNN output out = fprop_function(input_value) gmean, gvar = stats_function() ng.testing.assert_allclose(out, ref, rtol=rtol, atol=recurrent_atol) ng.testing.assert_allclose(gmean, bn_params['gmean'], rtol=rtol, atol=recurrent_atol) ng.testing.assert_allclose(gvar, bn_params['gvar'], rtol=rtol, atol=recurrent_atol)
def test_broadcast_to(test_case): src_shape, dst_shape = test_case # numpy results x_np = np.array(np.random.rand(*src_shape)) f_np = x_np + np.zeros(dst_shape) # ngraph results x_ng = ng.constant(x_np, axes=make_pos_axes(x_np.shape)) f_ng = broadcast_to(x_ng, dst_shape) with ExecutorFactory() as ex: f_ng_comp = ex.transformer.computation(f_ng) f_ng_val = f_ng_comp() np.testing.assert_allclose(f_ng_val, f_np)
def test_rnn_deriv_numerical(sequence_length, input_size, hidden_size, batch_size, return_sequence, weight_initializer, bias_initializer, backward, init_state): # Get input placeholder and numpy array input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size) # Construct network weights and initial state, if desired W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size, weight_initializer, bias_initializer, init_state) # Generate ngraph RNN rnn_ng = RNNCell(hidden_size, init=W_in, init_h2h=W_rec, activation=Tanh(), reset_cells=True) # fprop ngraph RNN num_steps = input_placeholder.axes.recurrent_axis().length init_states = {'h': init_state} if init_state is not None else init_state out_ng = unroll(rnn_ng, num_steps, input_placeholder, init_states=init_states, return_sequence=return_sequence) params = [(rnn_ng.i2h.linear.W, W_in), (rnn_ng.h2h.W, W_rec), # (rnn_ng.i2h.bias.W, b) ] with ExecutorFactory() as ex: # Create derivative computations and execute param_updates = list() for px, _ in params: if init_state is not None: update = (ex.derivative(out_ng, px, input_placeholder, init_state), ex.numeric_derivative(out_ng, px, delta, input_placeholder, init_state)) else: update = (ex.derivative(out_ng, px, input_placeholder), ex.numeric_derivative(out_ng, px, delta, input_placeholder)) param_updates.append(update) for (deriv_s, deriv_n), (_, val) in zip(param_updates, params): if init_state is not None: ng.testing.assert_allclose(deriv_s(val, input_value, init_state_value), deriv_n(val, input_value, init_state_value), rtol=num_rtol, atol=num_atol) else: ng.testing.assert_allclose(deriv_s(val, input_value), deriv_n(val, input_value), rtol=num_rtol, atol=num_atol)
def test_gdm(random_learning_rate, random_momentum_coef, wdecay, nesterov, transformer_factory): # Setup the baseline and reference optimizers to be tested gdm_args = { 'learning_rate': random_learning_rate, 'momentum_coef': random_momentum_coef, 'wdecay': wdecay, 'nesterov': nesterov } gdm_reference = GDMReference(**gdm_args) gdm = GradientDescentMomentum(**gdm_args) # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using GDM np_W = np.random.rand(C.length) W = ng.variable([C], initial_value=np_W) # Set up op graph cost = ng.sum(target - ng.dot(W, data), out_axis=()) updated_weights = ng.sequential([gdm(cost), W]) def data_generator(iteration_count): for i in range(iteration_count): yield (np.random.rand(C.length, N.length).astype('float32'), np.random.rand(N.length).astype('float32')) # Set up the computation and run the "train" loop with ExecutorFactory() as ex: gdm_baseline = ex.transformer.computation(updated_weights, data, target) mock_dataset = data_generator(20) for x, y in mock_dataset: ng_W = gdm_baseline(x, y) # updated weights for ngraph optimizer np_W = gdm_reference( x, np_W) # updated weights for reference optimizer ng.testing.assert_allclose(np_W, ng_W, rtol=1e-3)
def ng_run(self, tf_target_node, tf_init_op=None, tf_feed_dict={}, print_ng_result=False, verbose=False): """ Run and get ngraph results Args: tf_target_node: target node in tf tf_feed_dict: feed_dict in tf print_ng_result: prints ng_result if set to True verbose: prints tf's node_def if set to True Returns: ng_result """ # init importer, transformer importer = TFImporter() graph_def = tf.GraphDef() graph_def.ParseFromString(self.graph_string) importer.import_graph_def(graph_def, verbose=verbose) # set target node ng_target_node = importer.get_op_handle_by_name( tf_target_node.name[:-2]) # get targeting nodes for ng, convert tf's feed dict to list ng_feed_dict = { importer.get_op_handle_by_name(node.name[:-2]): val for (node, val) in tf_feed_dict.items() } # evaluate ngraph with ExecutorFactory() as ex: ng_result_comp = ex.transformer.computation( ng_target_node, *ng_feed_dict.keys()) if tf_init_op: ex.transformer.computation( importer.get_op_handle(tf_init_op))() ng_result = ng_result_comp(feed_dict=ng_feed_dict) return ng_result
def test_batchnorm_fprop(batch_size, input_size, rho, epsilon, transformer_factory): # This checks that that we are doing batch norm across a feature make_axis # and properly tracking the side effect variables np.random.seed(0) # set inputs N = ng.make_axis(batch_size, name='N') F = ng.make_axis(input_size) input_placeholder = ng.placeholder([F, N]) layer = BatchNorm(rho, epsilon) fprop = layer.train_outputs(input_placeholder) with ExecutorFactory() as ex: fprop_function = ex.transformer.computation(fprop, input_placeholder) stats_function = ex.transformer.computation( [ng.value_of(layer.gmean), ng.value_of(layer.gvar)]) # initial conditions for tracked variables gmean_ref, gvar_ref = 0.0, 1.0 # create data for i in range(2): x = np.random.random((input_size, batch_size)).astype(np.float32) out = fprop_function(x) gm, gv = stats_function() xmean = x.mean(axis=1, keepdims=True) xvar = x.var(axis=1, keepdims=True) out_ref = (x - xmean) / np.sqrt(xvar + epsilon) gmean_ref = xmean.ravel() * (1.0 - rho) + gmean_ref * rho gvar_ref = xvar.ravel() * (1.0 - rho) + gvar_ref * rho assert ng.testing.allclose( out, out_ref, atol=1e-6), '%e' % np.max(np.abs(out - out_ref)) assert ng.testing.allclose( gm, gmean_ref, atol=1e-6), '%e' % np.max(np.abs(gm - gmean_ref)) assert ng.testing.allclose( gv, gvar_ref, atol=1e-6), '%e' % np.max(np.abs(gv - gvar_ref))
def test_weight_clipping(w_clip, optimizer): opt_ng = optimizer(0.1, weight_clip_value=w_clip) if isinstance(opt_ng, Adam): pytest.config.argon_skip_now("Argon Transformer error") # TODO triage # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested # make sure initial values are higher than clip values np_W = 10 * w_clip * (2 * np.random.rand(C.length) - 1) W = ng.variable([C], initial_value=np_W) # double check generated initial W value assert np.max(np_W) > w_clip assert np.min(np_W) < -w_clip # Set up op graph cost = ng.sum(target - ng.dot(W, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost), W]) epsilon = w_clip * 1e-3 # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation(updated_weights, data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: ng_W = opt_ng_comp(x, y) # updated weights for ngraph optimizer assert np.max(ng_W) < w_clip + epsilon assert np.min(ng_W) > -w_clip - epsilon
def test_recurrent_batchnorm_bprop(RNN, recurrent_input, output_size, bn_params, transformer_factory): """Compare bprop gated RNN with batch norm to numpy batch norm followed by rnn without""" helper = RNNHelper(recurrent_input, output_size, RNN, bn_params) # Get rnn + batch norm bprop graph fprop = helper.rnn(recurrent_input) bprop_vars = [recurrent_input, helper.gamma, helper.beta] # Get bprop graph delta_placeholder = ng.placeholder(fprop.axes) bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars] # Get reference graphs reference_fprop = helper.reference_rnn(helper.reference_input) # Handle the case where we have gates in the RNN object bprop_vars = [helper.reference_input] if helper.has_gates: bprop_vars.append(helper.get_ancestor_op(reference_fprop)) reference_delta_placeholder = ng.placeholder(reference_fprop.axes) reference_bprop = [ ng.deriv(reference_fprop, var, reference_delta_placeholder) for var in bprop_vars ] # Begin execution with ExecutorFactory() as ex: bprop_function = ex.executor(bprops, recurrent_input, delta_placeholder) reference_function = ex.executor(reference_bprop, helper.reference_input, reference_delta_placeholder) # Create data input_value = rng.uniform(0, 1, recurrent_input.axes) delta = rng.uniform(-.1, .1, fprop.axes) # Compute reference weighted input weighted_input = np.dot(helper.W_in, input_value.swapaxes(0, 1)) # Set the reduction axes used for reference bn_params['axis'] = (1, 2) # Get reference batch normed input batch_norm_reference = BatchNormReference(weighted_input, **bn_params) normed_input = batch_norm_reference.fprop[0] # Reference backprop through RNN reference_result = reference_function(normed_input, delta) # This is because of a HETR bug where return collections aren't handled properly if isinstance(reference_result, tuple): rnn_delta = reference_result[0] else: rnn_delta = reference_result # Reference backprop through BN dx_ref, dgamma_ref, dbeta_ref = batch_norm_reference.bprop(rnn_delta) # Backprop through reference batch norm for a single gate if helper.has_gates: rnn_gate_delta = reference_result[1] _, dgamma_ref, dbeta_ref = batch_norm_reference.bprop( rnn_gate_delta) # Backprop through weighted input dx_ref = np.dot(helper.W_in.T, dx_ref.swapaxes(0, 1)) # Compute ngraph bprop dx, dgamma, dbeta = bprop_function(input_value, delta) assert ng.testing.allclose(dx, dx_ref, rtol=rtol, atol=recurrent_atol) assert ng.testing.allclose(dgamma, dgamma_ref, rtol=rtol, atol=recurrent_atol) assert ng.testing.allclose(dbeta, dbeta_ref, rtol=rtol, atol=recurrent_atol)
def check_stacked_lstm(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True, backward=False, reset_cells=False, num_iter=2): Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, name='R') N = ng.make_axis(batch_size, name='N') with ExecutorFactory() as ex: np.random.seed(0) inp_ng = ng.placeholder([Cin, REC, N]) lstm_ng_1 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) lstm_ng_2 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) out_ng_1 = lstm_ng_1.train_outputs(inp_ng) out_ng_2 = lstm_ng_2.train_outputs(out_ng_1) fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng) # fprop on random inputs for multiple iterations fprop_neon_2_list = [] input_value_list = [] for i in range(num_iter): input_value = rng.uniform(-1, 1, inp_ng.axes) fprop_neon_2 = fprop_neon_fun_2(input_value).copy() # comparing outputs if return_seq is True: fprop_neon_2 = fprop_neon_2[:, :, 0] input_value_list.append(input_value) fprop_neon_2_list.append(fprop_neon_2) if reset_cells is False: # look at the last hidden states assert ng.testing.allclose(fprop_neon_2[:, -1].reshape(-1, 1), lstm_ng_2.h_init.value.get(None), rtol=rtol, atol=atol) # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives # concatenate weights to i, f, o, g together (in this order) gates = ['i', 'f', 'o', 'g'] Wxh_neon_1 = \ np.concatenate([lstm_ng_1.W_input[k].value.get(None).copy().T for k in gates], 1) Whh_neon_1 = \ np.concatenate([lstm_ng_1.W_recur[k].value.get(None).copy().T for k in gates], 1) bh_neon_1 = \ np.concatenate([lstm_ng_1.b[k].value.get(None).copy() for k in gates]) Wxh_neon_2 = \ np.concatenate([lstm_ng_2.W_input[k].value.get(None).copy().T for k in gates], 1) Whh_neon_2 = \ np.concatenate([lstm_ng_2.W_recur[k].value.get(None).copy().T for k in gates], 1) bh_neon_2 = \ np.concatenate([lstm_ng_2.b[k].value.get(None).copy() for k in gates]) # reference numpy LSTM lstm_ref_1 = RefLSTM() lstm_ref_2 = RefLSTM() WLSTM_1 = lstm_ref_1.init(input_size, hidden_size) WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size) # make ref weights and biases the same with neon model WLSTM_1[0, :] = bh_neon_1 WLSTM_1[1:input_size + 1, :] = Wxh_neon_1 WLSTM_1[input_size + 1:] = Whh_neon_1 WLSTM_2[0, :] = bh_neon_2 WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2 WLSTM_2[hidden_size + 1:] = Whh_neon_2 # transpose input X and do fprop fprop_ref_2_list = [] c0_1 = h0_1 = None c0_2 = h0_2 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1, c0_1, h0_1) (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2, c0_2, h0_2) if reset_cells is False: c0_1 = cprev_1 h0_1 = hprev_1 c0_2 = cprev_2 h0_2 = hprev_2 # the output needs transpose as well Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T fprop_ref_2_list.append(Hout_ref_2) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_2_list[i], fprop_ref_2_list[i], rtol=rtol, atol=atol)
def check_lstm(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True, backward=False, reset_cells=False, num_iter=2): Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, name='R') N = ng.make_axis(batch_size, name='N') with ExecutorFactory() as ex: np.random.seed(0) inp_ng = ng.placeholder([Cin, REC, N]) lstm_ng = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) out_ng = lstm_ng.train_outputs(inp_ng) fprop_neon_fun = ex.executor(out_ng, inp_ng) fprop_neon_list = [] input_value_list = [] for i in range(num_iter): # fprop on random inputs input_value = rng.uniform(-1, 1, inp_ng.axes) fprop_neon = fprop_neon_fun(input_value).copy() if return_seq is True: fprop_neon = fprop_neon[:, :, 0] input_value_list.append(input_value) fprop_neon_list.append(fprop_neon) if reset_cells is False: # look at the last hidden states assert ng.testing.allclose(fprop_neon[:, -1].reshape(-1, 1), lstm_ng.h_init.value.get(None), rtol=rtol, atol=atol) # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives # concatenate weights to i, f, o, g together (in this order) gates = ['i', 'f', 'o', 'g'] Wxh_neon = [lstm_ng.W_input[k].value.get(None).copy().T for k in gates] Whh_neon = [lstm_ng.W_recur[k].value.get(None).copy().T for k in gates] bh_neon = [lstm_ng.b[k].value.get(None).copy() for k in gates] # reference numpy LSTM lstm_ref = RefLSTM() WLSTM = lstm_ref.init(input_size, hidden_size) # make ref weights and biases with neon model WLSTM[0, :] = np.concatenate(bh_neon) WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1) WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1) # transpose input X and do fprop fprop_ref_list = [] c0 = h0 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref, WLSTM, c0, h0) if reset_cells is False: c0 = cprev h0 = hprev # the output needs transpose as well Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T fprop_ref_list.append(Hout_ref) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_list[i], fprop_ref_list[i], rtol=rtol, atol=atol)
def test_seq2seq_deriv_ref(batch_size, sequence_length_enc, sequence_length_dec, input_size, hidden_size, weight_initializer, bias_initializer, transformer_factory): # TODO: are these assumptions true? assert batch_size == 1, "the seq2seq reference implementation only support batch size 1" # Get input placeholders and numpy arrays input_placeholder_enc, input_value_enc, = \ make_placeholder(input_size, sequence_length_enc, batch_size) input_placeholder_dec, input_value_dec, = \ make_placeholder(input_size, sequence_length_dec, batch_size) # Construct encoder weights W_in_enc, W_rec_enc, b_enc, _, _ = make_weights(input_placeholder_enc, hidden_size, weight_initializer, bias_initializer, init_state=False) # Construct decoder weights W_in_dec, W_rec_dec, b_dec, _, _ = make_weights(input_placeholder_dec, hidden_size, weight_initializer, bias_initializer, init_state=False) # Reference numpy seq2seq seq2seq_ref = RefSeq2Seq(input_size, hidden_size, decoder_return_sequence=True) seq2seq_ref.set_weights(W_in_enc, W_rec_enc, b_enc.reshape(seq2seq_ref.bh_enc.shape), W_in_dec, W_rec_dec, b_dec.reshape(seq2seq_ref.bh_dec.shape)) # Prepare deltas for gradient check output_shape = (hidden_size, sequence_length_dec, batch_size) # generate random deltas tensor deltas = np.random.randn(*output_shape) # the reference code expects these shapes: # input_shape: (seq_len, input_size, batch_size) # output_shape: (seq_len, hidden_size, batch_size) dW_in_enc, dW_rec_enc, db_enc, dW_in_dec, dW_rec_dec, db_dec, encoding_ref, hs_return_dec = \ seq2seq_ref.lossFun(input_value_enc.transpose([1, 0, 2]), input_value_dec.transpose([1, 0, 2]), deltas.copy().transpose([1, 0, 2])) # Generate ngraph Seq2Seq rnn_enc_ng = Recurrent(hidden_size, init=W_in_enc, init_inner=W_rec_enc, activation=Tanh(), reset_cells=True, return_sequence=False) rnn_dec_ng = Recurrent(hidden_size, init=W_in_dec, init_inner=W_rec_dec, activation=Tanh(), reset_cells=True, return_sequence=True) # ngraph fprop graph encoding_ng = rnn_enc_ng(input_placeholder_enc, init_state=None) output_ng = rnn_dec_ng(input_placeholder_dec, init_state=encoding_ng) deltas_constant = ng.constant(deltas, axes=output_ng.axes) params = [(rnn_dec_ng.b, db_dec), (rnn_dec_ng.W_input, dW_in_dec), (rnn_dec_ng.W_recur, dW_rec_dec), (rnn_enc_ng.b, db_enc), (rnn_enc_ng.W_input, dW_in_enc), (rnn_enc_ng.W_recur, dW_rec_enc)] with ExecutorFactory() as ex: # fprop computations fprop_fun = ex.executor([encoding_ng, output_ng], input_placeholder_enc, input_placeholder_dec) # gradient computations update_funs = [] for px, _ in params: update = ng.deriv(output_ng, px, error=deltas_constant) update_funs.append( ex.executor(update, input_placeholder_enc, input_placeholder_dec)) # check forward pass encoding, output = fprop_fun(input_value_enc, input_value_dec) ng.testing.assert_allclose(encoding, encoding_ref) ng.testing.assert_allclose(np.squeeze(output), np.squeeze(hs_return_dec)) # check gradient computations for update_fun, (_, deriv_ref_val) in zip(update_funs, params): grad_neon = update_fun(input_value_enc, input_value_dec) ng.testing.assert_allclose(grad_neon, deriv_ref_val.squeeze(), rtol=bprop_rtol, atol=1e-4)