def testAtrousDepthwiseConv2DForward(self): strides = [1, 1, 1, 1] with self.session(): # Input: [batch, height, width, input_depth] height = 9 for width in [9, 10]: # Test both odd and even width. x_shape = [2, height, width, 2] x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape) # Filter: [kernel_height, kernel_width, input_depth, output_depth] for kernel_height in range(1, 4): for kernel_width in range(1, 4): f_shape = [kernel_height, kernel_width, 2, 2] f = np.arange(np.prod(f_shape), dtype=np.float32).reshape(f_shape) for rate in range(1, 4): f_up = _upsample_filters(f, rate) for padding in ["SAME", "VALID"]: y1 = nn_impl.depthwise_conv2d( x, f, strides, padding, rate=[rate, rate]) y2 = nn_impl.depthwise_conv2d( x, f_up, strides, padding) self.assertAllClose(y1, y2, rtol=1e-3, atol=1e-3)
def testDepthwiseConv2dGradWRTFilter(self): x = constant_op.constant([0.5], dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') f = array_ops.placeholder( dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') strides = [1, 1, 1, 1] padding = 'SAME' y = nn_impl.depthwise_conv2d(x, f, strides, padding) self.run_test(f, y)
def testDepthwiseConv2DWithUnknownShape(self): # GitHub issue 22110. if not test.is_gpu_available(): return with self.session(use_gpu=True): x = array_ops.placeholder(dtypes.float32) f = np.ones([1, 1, 1, 1], np.float32) v = nn_impl.depthwise_conv2d( x, f, [1, 1, 1, 1], "VALID", rate=[2, 1], data_format="NCHW") self.assertAllEqual( np.ones([1, 1, 1, 1], np.float32), v.eval(feed_dict={x: np.ones([1, 1, 1, 1], np.float32)}))
def testDepthwiseConv2dGradWRTFilter(self): x = constant_op.constant([0.5], dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') f = array_ops.placeholder(dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') strides = [1, 1, 1, 1] padding = 'SAME' y = nn_impl.depthwise_conv2d(x, f, strides, padding) self.run_test(f, y)
def _VerifyValues(self, input_size, filter_size, stride, padding): imag = np.random.rand(*input_size).astype(np.float32) filt = np.random.rand(*filter_size).astype(np.float32) strides = [1, stride, stride, 1] with self.test_session(): with self.test_scope(): imag_ph = array_ops.placeholder(dtypes.float32, shape=input_size) filt_ph = array_ops.placeholder(dtypes.float32, shape=filter_size) feed_dict = {imag_ph: imag, filt_ph: filt} xla_out = nn_impl.depthwise_conv2d(imag_ph, filt_ph, strides, padding).eval(feed_dict=feed_dict) with self.test_session(): with ops.device(self.CPU_DEVICE): imag_ph = array_ops.placeholder(dtypes.float32, shape=input_size) filt_ph = array_ops.placeholder(dtypes.float32, shape=filter_size) feed_dict = {imag_ph: imag, filt_ph: filt} cpu_out = nn_impl.depthwise_conv2d(imag_ph, filt_ph, strides, padding).eval(feed_dict=feed_dict) self.assertAllClose(xla_out, cpu_out)
def testAtrousDepthwiseConv2DForward(self): strides = [1, 1, 1, 1] with self.session(use_gpu=True): # Input: [batch, height, width, input_depth] height = 9 for width in [9, 10]: # Test both odd and even width. x_shape = [2, height, width, 2] x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape) # Filter: [kernel_height, kernel_width, input_depth, output_depth] for kernel_height in range(1, 4): for kernel_width in range(1, 4): f_shape = [kernel_height, kernel_width, 2, 2] f = np.arange(np.prod(f_shape), dtype=np.float32).reshape(f_shape) for rate in range(1, 4): f_up = _upsample_filters(f, rate) for padding in ["SAME", "VALID"]: y1 = nn_impl.depthwise_conv2d( x, f, strides, padding, rate=[rate, rate]) y2 = nn_impl.depthwise_conv2d(x, f_up, strides, padding) self.assertAllClose( y1.eval(), self.evaluate(y2), rtol=1e-3, atol=1e-3)
def call(self, inputs): if self.rank == 1: inputs = array_ops.expand_dims(inputs, axis=self.exp_dim_pos) outputs = nn_impl.depthwise_conv2d(input=inputs, filter=self.kernel, strides=self._strides, padding=self.op_padding.upper(), rate=self.dilation_rate, data_format=self._data_format) # Grouplize the output channels. r2_outputs_shape = outputs.get_shape().as_list() if self.data_format == 'channels_first': #get_oshape = r2_outputs_shape[:1].concatenate([self.lgroups*self.lfilters, self.group_input_dim]).concatenate(r2_outputs_shape[2:]) get_oshape = [ -1, self.lgroups * self.lfilters, self.group_input_dim, *r2_outputs_shape[2:] ] outputs = array_ops.reshape(outputs, get_oshape) outputs = math_ops.reduce_sum(outputs, axis=1, keepdims=False) else: #get_oshape = r2_outputs_shape[:-1].concatenate([self.lgroups*self.lfilters, self.group_input_dim]) get_oshape = [ -1, *r2_outputs_shape[1:-1], self.lgroups * self.lfilters, self.group_input_dim ] outputs = array_ops.reshape(outputs, get_oshape) outputs = math_ops.reduce_sum(outputs, axis=-1, keepdims=False) if self.rank == 1: outputs = array_ops.squeeze(outputs, axis=self.exp_dim_pos) outputs_list = [] if self.use_bias: if self.data_format == 'channels_first': if self.rank == 1: # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape( self.bias, (1, self.lfilters * self.lgroups, 1)) outputs += bias if self.rank == 2: outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') if self.activation is not None: return self.activation(outputs) return outputs
def testConv2dBackpropFilterGrad(self): x = array_ops.placeholder( dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') f = constant_op.constant([0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') strides = [1, 1, 1, 1] padding = 'SAME' out = nn_impl.depthwise_conv2d(x, f, strides, padding) grad_wrt_input = gradients_impl.gradients(out, x)[0] self.run_test(f, grad_wrt_input) grad_wrt_filter = gradients_impl.gradients(out, f)[0] self.run_test(x, grad_wrt_filter)
def testDepthwiseConv2dBackpropFilterGrad(self): x = array_ops.placeholder(dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') f = constant_op.constant([0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') strides = [1, 1, 1, 1] padding = 'SAME' out = nn_impl.depthwise_conv2d(x, f, strides, padding) grad_wrt_input = gradients_impl.gradients(out, x)[0] self.run_test(f, grad_wrt_input) grad_wrt_filter = gradients_impl.gradients(out, f)[0] self.run_test(x, grad_wrt_filter)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) conv_native = nn_ops.depthwise_conv2d_native( t1, t2, strides=[1, stride, stride, 1], padding=padding) conv_gold = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) gold_result = sess.run(conv_gold) print("diff matrix:", np.amax(np.ravel(native_result) - np.ravel(gold_result))) self.assertArrayNear(np.ravel(native_result), np.ravel(gold_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_gold)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-8, dtypes.float64: 1e-13, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with sess.graph._kernel_label_map( {"DepthwiseConv2dNative": "cudnn_grouped_convolution"} if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: native_result = sess.run(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'" ): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) interface_result = sess.run(conv_interface) tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f", data_type, use_gpu, grouped_conv, np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear(np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: with sess.graph._kernel_label_map( {"DepthwiseConv2dNative": "neon"}): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes, ", stride:", stride, ", padding: ", padding, ", max diff: ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose(np.ravel(native_result), np.ravel(interface_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _ConstructAndTestGradient(self, input_shape, filter_shape, output_shape, stride, padding, data_type, test_input, use_gpu, grouped_conv=False, data_format="NHWC", dilations=None): input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] input_np = np.array(input_data).reshape(input_shape) filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] filter_np = np.array(filter_data).reshape(filter_shape) ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-0, dtypes.float32: 8e-4, dtypes.float64: 1e-12, }[data_type] input_tensor = constant_op.constant( input_np, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_np, shape=filter_shape, dtype=data_type, name="filter") native_input = input_tensor strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_input = array_ops.transpose(input_tensor, [0, 3, 1, 2]) input_shape = [ input_shape[0], input_shape[3], input_shape[1], input_shape[2] ] output_shape = [ output_shape[0], output_shape[3], output_shape[1], output_shape[2] ] strides = [1, 1, stride, stride] with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropInput": "cudnn_grouped_convolution", "DepthwiseConv2dNativeBackpropFilter": "cudnn_grouped_convolution", } if grouped_conv else {}): depthwise_conv2d = nn_impl.depthwise_conv2d( native_input, filter_tensor, strides, padding, data_format=data_format, dilations=dilations, name="depthwise_conv2d") self.assertEqual(output_shape, depthwise_conv2d.get_shape()) try: if test_input: err = gradient_checker.compute_gradient_error( native_input, input_shape, depthwise_conv2d, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, depthwise_conv2d, output_shape) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if grouped_conv and e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"): tf_logging.warn("Skipping grouped convolution test") return raise e tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, error = %f", data_type, use_gpu, grouped_conv, err) self.assertLess(err, tolerance)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC", dilations=None): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". dilations: A list of 2 elements, representing the dilations. """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x1 = np.array(x1).reshape(tensor_in_sizes) x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] x2 = np.array(x2).reshape(filter_in_sizes) # Compute reference result strides = [1, stride, stride, 1] np_result = _DepthwiseConv2dNumpy(x1, x2, strides, padding, "NHWC", dilations) ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-5, dtypes.float64: 1e-12, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] # depthwise_conv2d_native does not support dilations except on TPUs. if dilations is None: with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution" } if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: # The Numpy array from calling depthwise_conv2d_native native_result = self.evaluate(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op " "'DepthwiseConv2dNative'"): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=strides, padding=padding, data_format=data_format, dilations=dilations) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_interface = array_ops.transpose(conv_interface, [0, 2, 3, 1]) # The Numpy array from calling depthwise_conv2d interface_result = self.evaluate(conv_interface) if dilations is None: self.assertAllClose(native_result, np_result, atol=tolerance, rtol=0.) self.assertAllClose(interface_result, np_result, atol=tolerance, rtol=0.)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, grouped_conv=False, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. grouped_conv: Whether to use cuDNN 7's grouped convolution. data_format: The data_format of the input. "NHWC" or "NCHW". """ input_size = 1 filter_size = 1 for s in tensor_in_sizes: input_size *= s for s in filter_in_sizes: filter_size *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 / input_size for f in range(1, input_size + 1)] x2 = [f * 1.0 / filter_size for f in range(1, filter_size + 1)] ops.reset_default_graph() graph = ops.get_default_graph() with self.session(graph=graph, use_gpu=use_gpu) as sess: tolerance = { dtypes.float16: 4e-2, dtypes.float32: 1e-5, dtypes.float64: 1e-12, }[data_type] t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with sess.graph._kernel_label_map({ "DepthwiseConv2dNative": "cudnn_grouped_convolution" } if grouped_conv else {}): conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) try: native_result = sess.run(conv_native) except errors.InvalidArgumentError as e: # Grouped convolution kernel is only registered for cuDNN 7. Silently # return when we are running on an earlier version or without GPU. if e.message.startswith( "No OpKernel was registered to support Op 'DepthwiseConv2dNative'"): tf_logging.warn("Skipping grouped convolution test") return raise e conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) interface_result = sess.run(conv_interface) tf_logging.info( "data_type: %r, use_gpu: %r, grouped_conv: %r, max diff = %f", data_type, use_gpu, grouped_conv, np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear( np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: with sess.graph._kernel_label_map({"DepthwiseConv2dNative": "neon"}): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NHWC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("depthwise conv_2d: ", tensor_in_sizes, "*", filter_in_sizes, ", stride:", stride, ", padding: ", padding, ", max diff: ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose( np.ravel(native_result), np.ravel(interface_result), 1e-5) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)
def _VerifyValuesWithDilation(self, tensor_in_sizes, filter_in_sizes, stride, dilation, padding, data_type, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. dilation: Dilation. padding: Padding type. data_type: The data type to use. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = np.array([f * 1.0 for f in range(1, total_size_1 + 1)], dtype=data_type).reshape(tensor_in_sizes) x2 = np.array([f * 1.0 for f in range(1, total_size_2 + 1)], dtype=data_type).reshape(filter_in_sizes) with self.session() as sess: if data_type == np.float32: # TODO(b/64210055): Tolerance for TPU is high. tolerance = 1e-2 else: self.assertEqual(data_type, np.float64) tolerance = 1e-8 t1 = array_ops.placeholder(shape=tensor_in_sizes, dtype=data_type) t2 = array_ops.placeholder(shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] dilations = [dilation, dilation] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] with self.test_scope(): conv_native = nn_impl.depthwise_conv2d(native_t1, t2, strides=strides, rate=dilations, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) with ops.device("CPU"): # CPU only support NHWC format strides = [1, stride, stride, 1] conv_interface = nn_impl.depthwise_conv2d(t1, t2, strides=strides, rate=dilations, padding=padding) native_result = sess.run(conv_native, {t1: x1, t2: x2}) interface_result = sess.run(conv_interface, {t1: x1, t2: x2}) print("data_type:", data_type, "max diff = ", np.amax(np.absolute(native_result - interface_result))) self.assertAllClose(np.ravel(native_result), np.ravel(interface_result), rtol=tolerance)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_type, use_gpu, data_format="NHWC"): """Verifies the output values of the convolution function. Args: tensor_in_sizes: Input tensor dimensions in [batch, input_rows, input_cols, input_depth]. filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols, input_depth, depth_multiplier]. stride: Stride. padding: Padding type. data_type: The data type to use. use_gpu: Whether to use GPU. data_format: The data_format of the input. "NHWC" or "NCHW". """ total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input and filter tensor with numbers incrementing from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu) as sess: if data_type == dtypes.float16: tolerance = 1e-5 elif data_type == dtypes.float32: tolerance = 1e-5 else: self.assertEqual(data_type, dtypes.float64) tolerance = 1e-8 t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=data_type) t1.set_shape(tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=data_type) native_t1 = t1 strides = [1, stride, stride, 1] if data_format == "NCHW": # Transpose from NWHC input to NCHW # Ex. [4, 5, 5, 48] to [4, 48, 5, 5] native_t1 = array_ops.transpose(t1, [0, 3, 1, 2]) strides = [1, 1, stride, stride] conv_native = nn_ops.depthwise_conv2d_native( native_t1, t2, strides=strides, data_format=data_format, padding=padding) if data_format == "NCHW": # Transpose back from NCHW to NHWC conv_native = array_ops.transpose(conv_native, [0, 2, 3, 1]) conv_interface = nn_impl.depthwise_conv2d( t1, t2, strides=[1, stride, stride, 1], padding=padding) native_result = sess.run(conv_native) interface_result = sess.run(conv_interface) print("data_type:", data_type, "use_gpu:", use_gpu, "max diff = ", np.amax(np.absolute(native_result - interface_result))) self.assertArrayNear( np.ravel(native_result), np.ravel(interface_result), tolerance) self.assertShapeEqual(native_result, conv_native) self.assertShapeEqual(native_result, conv_interface)