def deform_conv_2d(img, num_outputs, kernel_size=3, stride=2, normalizer_fn=ly.batch_norm, activation_fn=lrelu, name=''): img_shape = img.shape.as_list() assert (len(img_shape) == 4) N, C, H, W = img_shape with tf.variable_scope('deform_conv' + '_' + name): offset = ly.conv2d(img, num_outputs=2 * kernel_size**2, kernel_size=3, stride=2, activation_fn=None, data_format='NCHW') kernel = tf.get_variable( name='d_kernel', shape=(num_outputs, C, kernel_size, kernel_size), initializer=tf.random_normal_initializer(0, 0.02)) res = deform_conv_op(img, filter=kernel, offset=offset, rates=[1, 1, 1, 1], padding='SAME', strides=[1, 1, stride, stride], num_groups=1) if normalizer_fn is not None: res = normalizer_fn(res) if activation_fn is not None: res = activation_fn(res) return res
def run_benchmark(): global parameters timing_entries = [] with tf.Graph().as_default(): # Generate some dummy images. image_size = 224 kernel_col = 3 kernel_rol = 3 kernel_size = kernel_col * kernel_rol channel = 64 # Note that our padding definition is slightly different the cuda-convnet. # In order to force the model to start with the same activations sizes, # we add 3 to the image_size and employ VALID padding above. if FLAGS.data_format == 'NCHW': image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3] else: image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3] offset_shape = [ FLAGS.batch_size, 2 * kernel_size, image_size + 3, image_size + 3 ] kernel_shape = [channel, 3, kernel_col, kernel_rol] images = tf.Variable( tf.random_normal(image_shape, dtype=tf.float32, stddev=1e-1)) offset = tf.Variable( tf.random_normal(offset_shape, dtype=tf.float32, stddev=1e-1)) kernel = tf.Variable( tf.random_normal(kernel_shape, dtype=tf.float32, stddev=1e-1)) parameters = [kernel] last_layer = deform_conv_op.deform_conv_op(images, kernel, offset, strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="SAME", num_groups=1, deformable_group=1) # Build an initialization operation. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session() sess.run(init) run_forward = True run_forward_backward = True if FLAGS.forward_only and FLAGS.forward_backward_only: raise ValueError("Cannot specify --forward_only and " "--forward_backward_only at the same time.") if FLAGS.forward_only: run_forward_backward = False elif FLAGS.forward_backward_only: run_forward = False if run_forward: # Run the forward benchmark. timing_entries.append( time_tensorflow_run(sess, last_layer, "Forward")) if run_forward_backward: # Add a simple objective so we can calculate the backward pass. # objective = loss(last_layer, labels) loss = lambda x: tf.reduce_sum(x) objective = loss(last_layer) # Compute the gradient with respect to all the parameters. grad = tf.gradients(objective, parameters) # Run the backward benchmark. timing_entries.append( time_tensorflow_run(sess, grad, "Forward-backward"))
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 # os.environ["CUDA_VISIBLE_DEVICES"] = "0" import numpy as np import tensorflow as tf import lib.deform_conv_op as deform_conv_op # load test data, generated by np.random.random((8, 6, 4, 5)) with open("test.npz", 'rb') as f: arr = np.load(f) # arr = np.zeros((8, 6, 4, 5)) with tf.Session() as sess: with tf.device('/gpu:0'): a = tf.constant(arr, dtype=tf.float32) b = tf.constant(np.ones((21, 2, 2, 2), dtype=np.float32)) c = tf.constant(np.ones((8, 8, 2, 2), dtype=np.float32)) result = deform_conv_op.deform_conv_op(a, b, c, strides=[1, 1, 2, 2], rates=[1, 1, 1, 1], padding="VALID", num_groups=3) sm = sess.run(result) d = tf.constant(np.ones((8, 21, 2, 2), dtype=np.float32)) grad = tf.gradients(result, [a, b, c]) res = [sess.run(g) for g in grad] print(res[0]) # print(sm)