def testGhostBNNegativeVirtualBatch(self): shape = [6, 5, 4, 3] inp = random_ops.random_uniform(shape, seed=1) with self.assertRaises(ValueError): normalization_layers.batch_normalization( inp, num_virtual_batches=-1)
def testGhostBNVirtualBatch1(self): shape = [6, 5, 4, 3] inp = random_ops.random_uniform(shape, seed=1) out1 = normalization_layers.batch_normalization(inp) out2 = normalization_layers.batch_normalization( inp, num_virtual_batches=1) self.assertListEqual( out1.shape.as_list(), out2.shape.as_list()) with self.test_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) x = np.random.random(shape) y1, y2 = sess.run([out1, out2], feed_dict={inp: x}) self.assertAllClose(y1, y2, atol=1e-5)
def dnn_logit_fn(features, mode): """Deep Neural Network logit_fn. Args: features: This is the first item returned from the `input_fn` passed to `train`, `evaluate`, and `predict`. This should be a single `Tensor` or `dict` of same. mode: Optional. Specifies if this training, evaluation or prediction. See `ModeKeys`. Returns: A `Tensor` representing the logits, or a list of `Tensor`'s representing multiple logits in the MultiHead case. """ is_training = mode == model_fn.ModeKeys.TRAIN with variable_scope.variable_scope( 'input_from_feature_columns', values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner): net = feature_column_lib.input_layer( features=features, feature_columns=feature_columns) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: # TODO(hjm): In future, if this becomes popular, we can enable # customization of the batch normalization params by accepting a # list of `BatchNormalization` instances as `batch_norm`. net = normalization.batch_normalization( net, # The default momentum 0.99 actually crashes on certain # problem, so here we use 0.999, which is the default of # tf.contrib.layers.batch_norm. momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net,)) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def layer_with_recompute(inputs, is_recomputing=False): kwarg_values.append(is_recomputing) out = core_layers.dense(inputs, 2) out = normalization_layers.batch_normalization(out, training=True) if is_recomputing: # Ensure that the updates are not duplicated by popping off the latest # 2 additions. update_ops = ops.get_collection_ref(ops.GraphKeys.UPDATE_OPS) update_ops.pop() update_ops.pop() return out
def testGhostBNUnknownBatchSize(self): np_shape = [10, 5, 4] tf_shape = [None, 5, 4] inp = array_ops.placeholder(dtypes.float32, tf_shape) out = normalization_layers.batch_normalization( inp, num_virtual_batches=5) with self.test_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) x = np.random.random(np_shape) y = sess.run(out, feed_dict={inp: x}) self.assertListEqual(list(y.shape), np_shape)
def testBatchNormalizeLayerFp16(self): with self.session() as sess: with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): a = array_ops.placeholder(np.float16, [4, 64, 64, 4], name="input_a") normed = layers_norm.batch_normalization(a, fused=False) report = ReportJSON(self, sess) sess.run(variables.global_variables_initializer()) report.reset() result = sess.run(normed, {a: np.zeros([4, 64, 64, 4])}) self.assertAllClose(result, np.zeros([4, 64, 64, 4])) report.parse_log() bl = ['*convert*/Cast*'] report.assert_compute_sets_not_in_blacklist(bl) report.assert_tensor_input_names("input_a")
def testConvWithBnAndRelu(self): with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = layers.Conv2D( 2, 1, use_bias=True, kernel_initializer=init_ops.ones_initializer())(x) y = layers_norm.batch_normalization(y, fused=True) y = nn_ops.relu(y) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system(True, True, True) with tu.ipu_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(report) sess.run(y, {x: np.zeros([1, 4, 4, 2])}) result = sess.run(report) self.assertEqual( len(result), 6) # 2xcompile, 1xupload 1xload, 1xdownload, 1xexecute s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'host-exchange-local-copy', 'Copy_', 'vs/conv2d/Conv2D/convolution.*/Conv_1x1', 'vs/conv2d/BiasAdd', 'vs/batch_normalization/FusedBatchNorm/batch-norm-inference.*/', 'vs/Relu/custom-call/Nonlinearity' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def test_reference_batch_normalization(self): """Check that batch norm from VBN agrees with opensource implementation.""" random_seed.set_random_seed(1234) batch = random_ops.random_normal([6, 5, 7, 3, 3]) for axis in range(5): # Get `layers` batchnorm result. bn_normalized = normalization.batch_normalization( batch, axis, training=True) # Get VBN's batch normalization on reference batch. batch_axis = 0 if axis != 0 else 1 # axis and batch_axis can't same vbn = virtual_batchnorm.VBN(batch, axis, batch_axis=batch_axis) vbn_normalized = vbn.reference_batch_normalization() with self.cached_session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() bn_normalized_np, vbn_normalized_np = sess.run( [bn_normalized, vbn_normalized]) self.assertAllClose(bn_normalized_np, vbn_normalized_np)
def testBatchNormalizeLayerWithStableStatistics(self): with self.session() as sess: with ops.device("/device:IPU:0"): with variable_scope.variable_scope("", use_resource=True): a = array_ops.placeholder(np.float32, [4, 64, 64, 4], name="input_a") normed = layers_norm.batch_normalization(a, training=True) ReportJSON(self, sess, use_stable_norm_statistics=True) sess.run(variables.global_variables_initializer()) # Use a tensor with large mean to test the stability. This blows up with # the non-stable implementation (NaN output). Use a power-of-two that can # be represented exactly in float32 to make sure we work with an exact # mean internally. input_mean = 2.0**64 inputs = input_mean * np.ones([4, 64, 64, 4]) # y = gamma * (x - mean) / sqrt(variance + epsilon) + beta # Both (x - mean) and beta_initializer are zero, so this should be zero. result = sess.run(normed, {a: inputs}) self.assertAllEqual(result, np.zeros([4, 64, 64, 4]))
def test_same_as_batchnorm(self): """Check that batch norm on set X is the same as ref of X / y on `y`.""" random_seed.set_random_seed(1234) num_examples = 4 examples = [random_ops.random_normal([5, 7, 3]) for _ in range(num_examples)] # Get the result of the opensource batch normalization. batch_normalized = normalization.batch_normalization( array_ops.stack(examples), training=True) for i in range(num_examples): examples_except_i = array_ops.stack(examples[:i] + examples[i+1:]) # Get the result of VBN's batch normalization. vbn = virtual_batchnorm.VBN(examples_except_i) vb_normed = array_ops.squeeze( vbn(array_ops.expand_dims(examples[i], [0])), [0]) with self.cached_session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() bn_np, vb_np = sess.run([batch_normalized, vb_normed]) self.assertAllClose(bn_np[i, ...], vb_np)
def dnn_logit_fn(inputs, mode): is_training = mode == ModeKeys.TRAIN with variable_scope.variable_scope('input_from_feature_columns'): dnn_inputs = [] for c in column_names: dnn_inputs.append(inputs[c]) net = array_ops.concat(dnn_inputs, axis=1) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( 'hiddenlayer_%d' % layer_id, values=(net, )) as hidden_layer_scope: net = core_layers.dense( net, units=num_hidden_units, activation=activation_fn, kernel_initializer=init_ops.glorot_uniform_initializer(), name=hidden_layer_scope) if dropout is not None and is_training: net = core_layers.dropout(net, rate=dropout, training=True) if batch_norm: net = normalization.batch_normalization( net, momentum=0.999, training=is_training, name='batchnorm_%d' % layer_id) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope('logits', values=(net, )) as logits_scope: logits = core_layers.dense( net, units=units, activation=None, kernel_initializer=init_ops.glorot_uniform_initializer(), name=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) return logits
def testBatchNormsMatchFwdBwd(self): with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv1') y = layers_norm.batch_normalization(y, fused=True, training=True) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv2') y = layers_norm.batch_normalization(y, fused=True, training=True) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv3') y = layers_norm.batch_normalization(y, fused=True, training=True) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system(True, True, True) with tu.ipu_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(report) sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])}) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) # One BN for forwards and one BN for grad # (note that we don't cache gradient application) ok = [ '__seed*', 'host-exchange-local-copy-', 'Copy_', 'vs/conv1/Conv2D/convolution.*/Conv_1x1', 'vs/batch_normalization/FusedBatchNorm/batch-norm-training.*/', 'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce', 'gradients/vs/batch_normalization_2/FusedBatchNorm_grad/FusedBatchNormGrad/batch-norm-grad.*/', 'GradientDescent/update_vs/batch_normalization/', 'GradientDescent/update_vs/batch_normalization_1/', 'GradientDescent/update_vs/batch_normalization_2/', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4/Convolve', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo', ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def testBatchNormsMatchFwdBwdSomeOnShard0SomeOnShard1(self): with self.session() as sess: with ops.device("/device:IPU:0"): x = array_ops.placeholder(np.float32, shape=[1, 4, 4, 2]) with variable_scope.variable_scope("vs", use_resource=True): with ipu.scopes.ipu_shard(0): y = convolutional.conv2d( x, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv1') y = layers_norm.batch_normalization(y, fused=True, training=True) y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv2') y = layers_norm.batch_normalization(y, fused=True, training=True) with ipu.scopes.ipu_shard(1): y = convolutional.conv2d( y, 2, 1, use_bias=False, kernel_initializer=init_ops.ones_initializer(), name='conv3') y = layers_norm.batch_normalization(y, fused=True, training=True) loss = math_ops.reduce_sum(y) optimizer = gradient_descent.GradientDescentOptimizer(0.1) train = optimizer.minimize(loss) report = tu.ReportJSON(self, sess, sharded=True) tu.move_variable_initialization_to_cpu() sess.run(variables.global_variables_initializer()) report.reset() sess.run([train, loss], {x: np.zeros([1, 4, 4, 2])}) report.parse_log() # Two BN for forwards (on shards 0 and 1) and two BN for grad # (note that we don't cache gradient application) # pylint: disable=line-too-long ok = [ '__seed*', '*OnTileCopy*', 'Copy_', 'vs/conv1/Conv2D/convolution.*/Conv_1x1', 'vs/conv3/Conv2D/convolution.*/Conv_1x1', 'vs/batch_normalization/FusedBatchNorm*/batch-norm-training.*/', 'vs/batch_normalization_2/FusedBatchNorm*/batch-norm-training.*/', 'Sum/reduce.*/ReduceOnTile/InToIntermediateNoExchange/Reduce', 'Sum/reduce.*/ReduceFinalStage/IntermediateToOutput/Reduce', 'gradients/vs/batch_normalization_2/FusedBatchNorm*_grad/FusedBatchNormGrad*/batch-norm-grad.*/', 'gradients/vs/batch_normalization_1/FusedBatchNorm*_grad/FusedBatchNormGrad*/batch-norm-grad.*/', 'GradientDescent/update_vs/batch_normalization/', 'GradientDescent/update_vs/batch_normalization_1/', 'GradientDescent/update_vs/batch_normalization_2/', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Transpose', 'gradients/vs/conv3/Conv2D_grad/Conv2DBackpropInput/fusion/*Transpose', 'gradients/vs/conv2/Conv2D_grad/Conv2DBackpropInput/fusion.*/*Transpose', 'gradients/vs/conv1/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Conv_4x4', 'gradients/vs/conv1/Conv2D_grad/Conv2DBackpropFilter/fusion.*/AddTo', 'gradients/vs/conv1/Conv2D_grad/Conv2DBackpropFilter/fusion.*/Transpose', ] # pylint: enable=line-too-long report.assert_all_compute_sets_and_list(ok)
def build_generator(noise, caption, reuse=False): with tf.variable_scope("generator") as scope: if reuse: tf.get_variable_scope().reuse_variables() # t_noise = noise t_txt = caption t_txt = dense(inputs=t_txt, units=128, activation=my_leaky_relu) t_input = tf.concat(values=[t_noise, t_txt], axis=1) t0 = dense(inputs=t_input, units=128 * 8 * 4 * 4) t0 = batch_normalization(inputs=t0) t0 = tf.reshape(tensor=t0, shape=[-1, 4, 4, 128 * 8]) t = conv2d(inputs=t0, filters=256, kernel_size=[1, 1], strides=[1, 1], padding="valid", activation=my_leaky_relu) t = batch_normalization(inputs=t) t = conv2d(inputs=t, filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", activation=my_leaky_relu) t = batch_normalization(inputs=t) t = conv2d(inputs=t, filters=128 * 8, kernel_size=[3, 3], strides=[1, 1], padding="same") t = batch_normalization(inputs=t) t1 = tf.add(t0, t) t2 = conv2d_transpose(inputs=t1, filters=128 * 4, kernel_size=[3, 3], strides=[2, 2], padding="same") t2 = batch_normalization(inputs=t2) t = conv2d(inputs=t2, filters=128, kernel_size=[1, 1], strides=[1, 1], padding="valid", activation=my_leaky_relu) t = batch_normalization(inputs=t) t = conv2d(inputs=t, filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", activation=my_leaky_relu) t = batch_normalization(inputs=t) t = conv2d(inputs=t, filters=128 * 4, kernel_size=[3, 3], strides=[1, 1], padding="same") t = batch_normalization(inputs=t) t3 = tf.add(t2, t) t4 = conv2d_transpose(inputs=t3, filters=128 * 2, kernel_size=[3, 3], strides=[2, 2], padding="same", activation=my_leaky_relu) t4 = batch_normalization(inputs=t4) t5 = conv2d_transpose(inputs=t4, filters=128, kernel_size=[3, 3], strides=[2, 2], padding="same", activation=my_leaky_relu) t5 = batch_normalization(inputs=t5) t_output = conv2d_transpose(inputs=t5, filters=3, kernel_size=[3, 3], strides=[2, 2], padding="same", activation=tf.tanh) print("\nGenerator Output Shape: {}".format(t_output.shape)) return t_output
def testGhostBNInputOutputShapesMatch(self): shape = [6, 4, 3] inp = random_ops.random_uniform(shape, seed=1) out = normalization_layers.batch_normalization( inp, num_virtual_batches=2) self.assertListEqual(out.shape.as_list(), shape)
def build_discriminator(image, caption, reuse=False): with tf.variable_scope("discriminator") as scope: if reuse: tf.get_variable_scope().reuse_variables() # t0 = image t0 = conv2d(inputs=t0, filters=64, kernel_size=[4, 4], strides=[2, 2], padding="same", activation=my_leaky_relu) t1 = conv2d(inputs=t0, filters=128, kernel_size=[4, 4], strides=[2, 2], padding="same", activation=my_leaky_relu) t1 = batch_normalization(inputs=t1) t2 = conv2d(inputs=t1, filters=256, kernel_size=[4, 4], strides=[2, 2], padding="same", activation=my_leaky_relu) t2 = batch_normalization(inputs=t2) t3 = conv2d(inputs=t2, filters=512, kernel_size=[4, 4], strides=[2, 2], padding="same", activation=None) t3 = batch_normalization(inputs=t3) t = conv2d(inputs=t3, filters=128, kernel_size=[1, 1], strides=[1, 1], padding="valid", activation=my_leaky_relu) t = batch_normalization(inputs=t) t = conv2d(inputs=t, filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", activation=my_leaky_relu) t = batch_normalization(inputs=t) t = conv2d(inputs=t, filters=512, kernel_size=[3, 3], strides=[1, 1], padding="same") t = batch_normalization(inputs=t) t4 = tf.add(t3, t) # t_txt = dense(inputs=caption, units=128, activation=my_leaky_relu) t_txt = tf.expand_dims(input=t_txt, axis=1) t_txt = tf.expand_dims(input=t_txt, axis=1) t_txt = tf.tile(input=t_txt, multiples=[1, 4, 4, 1]) t_concat = tf.concat(values=[t4, t_txt], axis=3) t4 = conv2d(inputs=t_concat, filters=512, kernel_size=[1, 1], strides=[1, 1], padding="valid", activation=my_leaky_relu) t4 = batch_normalization(inputs=t4) t_output = conv2d(inputs=t4, filters=1, kernel_size=[4, 4], strides=[4, 4], padding="valid") print("\nDiscriminator Output Shape: {}".format(t_output.shape)) return t_output
def inference(self, x, training=True): logits = x with tf.variable_scope("mfcc", self.reuse): self.reuse = True kernel = 3 filter_factor = 8 x = batch_normalization(x) conv1_1 = dilated_conv1d_layer(x, kernel=kernel, num_filters=128 * filter_factor, stride=1, dilation=1, name="conv1_1", padding="SAME") conv1_2 = dilated_conv1d_layer(x, kernel=kernel, num_filters=64 * filter_factor, stride=1, dilation=2, name="conv1_2", padding="SAME") conv1_3 = dilated_conv1d_layer(x, kernel=kernel, num_filters=32 * filter_factor, stride=1, dilation=4, name="conv1_3", padding="SAME") conv1_4 = dilated_conv1d_layer(x, kernel=kernel, num_filters=16 * filter_factor, name="conv1_4", stride=1, dilation=8, padding="SAME") # conv1_5 = dilated_conv1d_layer(x, kernel=kernel, num_filters=8 * filter_factor, name="conv1_5", stride=1, # dilation=16, # padding="SAME") # conv1_6 = dilated_conv1d_layer(x, kernel=kernel, num_filters=4 * filter_factor, name="conv1_6", stride=1, # dilation=32, # padding="SAME") # conv1_7 = dilated_conv1d_layer(x, kernel=kernel, num_filters=2 * filter_factor, name="conv1_7", stride=1, # dilation=64, # padding="SAME") # conv1_8 = dilated_conv1d_layer(x, kernel=kernel, num_filters=2 * filter_factor, name="conv1_8", stride=1, # dilation=128, # padding="SAME") # # conv1_merge = tf.concat((conv1_1, conv1_2, conv1_3, conv1_4, conv1_5, conv1_6, conv1_7, conv1_8), axis=2) conv1_merge = tf.concat((conv1_1, conv1_2, conv1_3, conv1_4), axis=2) conv2_1 = dilated_conv1d_layer(conv1_1, kernel=kernel, num_filters=1024, name="conv2_1", stride=1, dilation=2, padding="SAME") conv3_1 = dilated_conv1d_layer(conv2_1, kernel=kernel, num_filters=1024, name="conv3_1", stride=1, dilation=2, padding="SAME") conv4_1 = dilated_conv1d_layer(conv3_1, kernel=kernel, num_filters=16, name="conv4_1", stride=1, dilation=2, padding="SAME") self.layers["x"] = x self.layers["conv1_1"] = conv1_1 self.layers["conv1_2"] = tf.reshape( conv1_2, [-1, x.get_shape()[1].value, conv1_2.get_shape()[2].value], name=None) self.layers["conv1_3"] = tf.reshape( conv1_3, [-1, x.get_shape()[1].value, conv1_3.get_shape()[2].value], name=None) self.layers["conv1_4"] = tf.reshape( conv1_4, [-1, x.get_shape()[1].value, conv1_4.get_shape()[2].value], name=None) self.layers["conv_merge"] = tf.reshape( conv1_merge, [-1, x.get_shape()[1].value, conv1_merge.get_shape()[2].value], name=None) self.layers["conv3_1"] = conv3_1 if training: pre_flatten = tf.reshape( conv4_1, [-1, x.get_shape()[1].value, conv4_1.get_shape()[2].value], name='pre_flatten') flatten = flatten1d(pre_flatten, name='flatten') # fc1 = fully_connected(flatten, units=512, name='fc1') # fc2 = fully_connected(fc1, units=256, name='fc2') logits = fully_connected(flatten, units=26, name='logits', activation=False) return logits