def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_format, dtype, use_gpu): total_size_tensor = 1 total_size_filter = 1 for s in tensor_in_sizes: total_size_tensor *= s for s in filter_in_sizes: total_size_filter *= s # Initializes the input tensor with array containing numbers from 0 to 1. # We keep the input tensor values fairly small to avoid overflowing float16 # during the conv3d. x1 = [f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1)] x2 = [f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1)] with self.cached_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if data_format == "NCDHW": t1 = test_util.NHWCToNCHW(t1) strides = test_util.NHWCToNCHW(strides) conv = nn_ops.conv3d(t1, t2, strides, padding=padding, data_format=data_format) if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) return conv
def testGradient(self): with self.test_session(): for padding in ["SAME", "VALID"]: for stride in [1, 2]: np.random.seed(1) in_shape = [2, 4, 3, 3, 2] in_val = constant_op.constant( 2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32) filter_shape = [3, 3, 3, 2, 3] strides = [1, stride, stride, stride, 1] # Make a convolution op with the current settings, just to easily get # the shape of the output. conv_out = nn_ops.conv3d(in_val, array_ops.zeros(filter_shape), strides, padding) out_backprop_shape = conv_out.get_shape().as_list() out_backprop_val = constant_op.constant( 2 * np.random.random_sample(out_backprop_shape) - 1, dtype=dtypes.float32) output = nn_ops.conv3d_backprop_filter_v2(in_val, filter_shape, out_backprop_val, strides, padding) err = gradient_checker.compute_gradient_error( [in_val, out_backprop_val], [in_shape, out_backprop_shape], output, filter_shape) print("conv3d_backprop_filter gradient err = %g " % err) err_tolerance = 1e-3 self.assertLess(err, err_tolerance)
def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes, strides, dilations, padding, data_format, use_gpu, err, mode): total_input_size = 1 total_filter_size = 1 for s in input_sizes: total_input_size *= s for s in filter_sizes: total_filter_size *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_input_size + 1)] x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] default_dilations = ( dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) # If any dilation rate is larger than 1, only do test on the GPU # because we currently do not have a CPU implementation for arbitrary # dilation rates. if default_dilations or use_gpu: with self.cached_session(use_gpu=use_gpu) as sess: if data_format == "NCDHW": input_sizes = test_util.NHWCToNCHW(input_sizes) t1 = constant_op.constant(x1, shape=input_sizes) t2 = constant_op.constant(x2, shape=filter_sizes) full_strides = [1] + strides + [1] full_dilations = [1] + dilations + [1] if data_format == "NCDHW": full_strides = test_util.NHWCToNCHW(full_strides) full_dilations = test_util.NHWCToNCHW(full_dilations) actual = nn_ops.conv3d( t1, t2, strides=full_strides, dilations=full_dilations, padding=padding, data_format=data_format) expected = nn_ops.convolution( t1, t2, padding=padding, strides=strides, dilation_rate=dilations, data_format=data_format) if data_format == "NCDHW": actual = test_util.NCHWToNHWC(actual) expected = test_util.NCHWToNHWC(expected) actual_grad = gradients_impl.gradients(actual, t1 if mode == "input" else t2)[0] expected_grad = gradients_impl.gradients(expected, t1 if mode == "input" else t2)[0] # "values" consists of two tensors for two backprops actual_value = self.evaluate(actual_grad) expected_value = self.evaluate(expected_grad) self.assertShapeEqual(actual_value, actual_grad) self.assertShapeEqual(expected_value, expected_grad) print("expected = ", expected_value) print("actual = ", actual_value) self.assertArrayNear(expected_value.flatten(), actual_value.flatten(), err)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected): total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=True) as sess: t1 = constant_op.constant(x1, shape=tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) conv = nn_ops.conv3d(t1, t2, strides, padding=padding) value = sess.run(conv) print("expected = ", expected) print("actual = ", value) self.assertArrayNear(expected, value.flatten(), 1e-5)
def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_format, use_gpu): total_size_1 = 1 total_size_2 = 1 for s in tensor_in_sizes: total_size_1 *= s for s in filter_in_sizes: total_size_2 *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if data_format == "NCDHW": t1 = test_util.NHWCToNCHW(t1) strides = test_util.NHWCToNCHW(strides) conv = nn_ops.conv3d(t1, t2, strides, padding=padding, data_format=data_format) if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) return conv
def _Conv3DBackpropInputGrad(op, grad): return [None, nn_ops.conv3d_backprop_filter_v2(grad, array_ops.shape(op.inputs[1]), op.inputs[2], strides=op.get_attr("strides"), padding=op.get_attr("padding")), nn_ops.conv3d(grad, op.inputs[1], strides=op.get_attr("strides"), padding=op.get_attr("padding"))]
def _Conv3DBackpropFilterGrad(op, grad): return [nn_ops.conv3d_backprop_input_v2(array_ops.shape(op.inputs[0]), grad, op.inputs[2], strides=op.get_attr("strides"), padding=op.get_attr("padding")), None, nn_ops.conv3d(op.inputs[0], grad, strides=op.get_attr("strides"), padding=op.get_attr("padding"))]
def _Conv3DBackpropInputGrad(op, grad): data_format = op.get_attr("data_format") return [ None, nn_ops.conv3d_backprop_filter_v2(grad, array_ops.shape(op.inputs[1]), op.inputs[2], strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), nn_ops.conv3d(grad, op.inputs[1], strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) ]
def testForward(self): in_shape = LayerShapeNCDHW(batch=2, channels=3, depth=5, height=7, width=6) filter_shape = FilterShape3D( depth=3, height=3, width=3, in_channels=3, out_channels=2) in_op = self._random_data_op(in_shape) filter_op = self._random_data_op(filter_shape) strides = [1, 1, 1, 1, 1] padding = 'VALID' dilations = [1, 1, 2, 2, 2] out_op = nn_ops.conv3d( in_op, filter_op, strides=strides, padding=padding, data_format='NCDHW', dilations=dilations) self._assert_reproducible(out_op)
def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes, stride, dilation, padding, data_format, use_gpu): total_size_tensor = 1 total_size_filter = 1 for s in tensor_in_sizes: total_size_tensor *= s for s in filter_in_sizes: total_size_filter *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)] x2 = [f * 1.0 for f in range(1, total_size_filter + 1)] with self.cached_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes) if isinstance(stride, collections.Iterable): strides = list(stride) else: strides = [stride, stride, stride] if data_format == "NCDHW": t1 = test_util.NHWCToNCHW(t1) full_strides = [1, 1] + strides full_dilation = [1, 1] + dilation else: full_strides = [1] + strides + [1] full_dilation = [1] + dilation + [1] expected = nn_ops.convolution( t1, t2, padding=padding, strides=strides, dilation_rate=dilation, data_format=data_format) computed = nn_ops.conv3d( t1, t2, strides=full_strides, dilations=full_dilation, padding=padding, data_format=data_format) if data_format == "NCDHW": expected = test_util.NCHWToNHWC(expected) computed = test_util.NCHWToNHWC(computed) return expected, computed
def _Conv3DBackpropFilterGrad(op, grad): data_format = op.get_attr("data_format").decode() return [ nn_ops.conv3d_backprop_input_v2(array_ops.shape(op.inputs[0]), grad, op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), None, nn_ops.conv3d(op.inputs[0], grad, dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) ]
def _Conv3DBackpropFilterGrad(op, grad): data_format = op.get_attr("data_format").decode() return [ nn_ops.conv3d_backprop_input_v2( array_ops.shape(op.inputs[0]), grad, op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), None, nn_ops.conv3d( op.inputs[0], grad, dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) ]
def _Conv3DBackpropInputGrad(op, grad): data_format = op.get_attr("data_format").decode() return [ None, nn_ops.conv3d_backprop_filter_v2( grad, array_ops.shape(op.inputs[1]), op.inputs[2], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format), nn_ops.conv3d( grad, op.inputs[1], dilations=op.get_attr("dilations"), strides=op.get_attr("strides"), padding=op.get_attr("padding"), data_format=data_format) ]
def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride, padding, data_format, dtype, use_gpu): total_size_tensor = 1 total_size_filter = 1 for s in tensor_in_sizes: total_size_tensor *= s for s in filter_in_sizes: total_size_filter *= s # Initializes the input tensor with array containing numbers from 0 to 1. # We keep the input tensor values fairly small to avoid overflowing float16 # during the conv3d. x1 = [ f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1) ] x2 = [ f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1) ] with self.cached_session(use_gpu=use_gpu): t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if data_format == "NCDHW": t1 = test_util.NHWCToNCHW(t1) strides = test_util.NHWCToNCHW(strides) conv = nn_ops.conv3d(t1, t2, strides, padding=padding, data_format=data_format) if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) return conv
def loop_body(x, y): # pylint: disable=unused-argument """ Internal function use to convole to fill and detect the holes. It will return the mask filled and the location of the holes. Args: x: The mask `Tensor` with values 0 or 1 to be filled. y: A `Tensor` corresponding to the filter used to detect holes. Return: A list of two elements: 1) A `Tensor` with the mask filled after a single iteration. 2) A sparse `Tensor` where the ones represent the location of the holes. """ if total_dims == 2: conv = nn_ops.conv2d(x, filt, strides=[1, 1, 1, 1], padding="SAME") else: conv = nn_ops.conv3d(x, filt, strides=[1, 1, 1, 1, 1], padding="SAME") holes = array_ops.where(conv >= threshold, array_ops.ones_like(x), array_ops.zeros_like(x)) return [x+holes, holes]
def test3DConv8x8x8_WithBias(self): with ops.device("/device:IPU:0"): inp = array_ops.placeholder(np.float32, [1, 84, 84, 84, 2], name="inp") wei = array_ops.placeholder(np.float32, [8, 8, 8, 2, 4], name="wei") bia = array_ops.placeholder(np.float32, [4], name="bia") output = nn_ops.conv3d(inp, wei, strides=[1, 4, 4, 4, 1], padding="VALID") output = nn_ops.bias_add(output, bia) with ops.device('cpu'): report = gen_ipu_ops.ipu_event_trace() tu.configure_ipu_system() with tu.ipu_session() as sess: sess.run(report) fd = { inp: np.zeros([1, 84, 84, 84, 2]), wei: np.zeros([8, 8, 8, 2, 4]), bia: np.zeros([4]), } result = sess.run(output, fd) self.assertAllClose(result, np.zeros([1, 20, 20, 20, 4])) result = sess.run(report) s = tu.extract_all_strings_from_event_trace(result) cs_list = tu.get_compute_sets_from_report(s) ok = [ '__seed*', 'host-exchange-local-copy-', 'Copy_', 'Conv3D/convolution.*/Conv_8x8x8_stride4x4x4', 'BiasAdd/fusion/addToChannel' ] self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
def test3Dimension(self): with self.cached_session(): input_shape = [8, 16, 16, 16, 8] total_input_size = 1 for s in input_shape: total_input_size *= s inputs = [ i * 1.0 / total_input_size for i in range(1, total_input_size + 1) ] a = constant_op.constant(inputs, shape=input_shape, dtype=dtypes.float32) filter_shape = [1, 1, 1, 8, 8] total_filter_size = 1 for s in filter_shape: total_filter_size *= s filters = [ i * 1.0 / total_filter_size for i in range(1, total_filter_size + 1) ] f = constant_op.constant(filters, shape=filter_shape, dtype=dtypes.float32) conv_t = nn_ops.conv3d(a, filter=f, strides=[1, 1, 1, 1, 1], padding="VALID") slice_t = array_ops.slice(conv_t, [0, 1, 1, 1, 0], [1, 1, 1, 1, 8]) result = self.evaluate(slice_t) expected = [ 0.03028321, 0.03132677, 0.03237033, 0.03341389, 0.03445745, 0.035501, 0.03654456, 0.03758812 ] self.assertAllClose(expected, result.flatten(), rtol=1e-6)
def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, padding, test_input, data_format, use_gpu): input_planes, input_rows, input_cols = input_shape filter_planes, filter_rows, filter_cols = filter_shape input_shape = [batch, input_planes, input_rows, input_cols, in_depth] filter_shape = [ filter_planes, filter_rows, filter_cols, in_depth, out_depth ] if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if padding == "VALID": output_planes = int( math.ceil((input_planes - filter_planes + 1.0) / strides[1])) output_rows = int( math.ceil((input_rows - filter_rows + 1.0) / strides[2])) output_cols = int( math.ceil((input_cols - filter_cols + 1.0) / strides[3])) else: output_planes = int(math.ceil(float(input_planes) / strides[1])) output_rows = int(math.ceil(float(input_rows) / strides[2])) output_cols = int(math.ceil(float(input_cols) / strides[3])) output_shape = [batch, output_planes, output_rows, output_cols, out_depth] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] if test.is_gpu_available() and use_gpu: data_type = dtypes.float32 if test.is_gpu_available(): tolerance = 4e-3 else: # As of Aug 2016, higher tolerance is needed for some CPU architectures. # Runs on a single machine can also generate slightly different errors # because of multithreading. tolerance = 8e-3 else: data_type = dtypes.float64 tolerance = 1e-8 with self.test_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) strides = test_util.NHWCToNCHW(strides) else: input_tensor = orig_input_tensor conv = nn_ops.conv3d( input_tensor, filter_tensor, strides, padding, data_format=data_format, name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) if test_input: err = gradient_checker.compute_gradient_error(orig_input_tensor, input_shape, conv, output_shape) else: err = gradient_checker.compute_gradient_error(filter_tensor, filter_shape, conv, output_shape) print("conv3d gradient error = ", err) self.assertLess(err, tolerance)
def _ConstructAndTestGradientForConfig(self, batch, input_shape, filter_shape, in_depth, out_depth, stride, padding, test_input, data_format, use_gpu): input_planes, input_rows, input_cols = input_shape filter_planes, filter_rows, filter_cols = filter_shape input_shape = [batch, input_planes, input_rows, input_cols, in_depth] filter_shape = [ filter_planes, filter_rows, filter_cols, in_depth, out_depth ] if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if padding == "VALID": output_planes = int( math.ceil((input_planes - filter_planes + 1.0) / strides[1])) output_rows = int( math.ceil((input_rows - filter_rows + 1.0) / strides[2])) output_cols = int( math.ceil((input_cols - filter_cols + 1.0) / strides[3])) else: output_planes = int(math.ceil(float(input_planes) / strides[1])) output_rows = int(math.ceil(float(input_rows) / strides[2])) output_cols = int(math.ceil(float(input_cols) / strides[3])) output_shape = [ batch, output_planes, output_rows, output_cols, out_depth ] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] if test.is_gpu_available() and use_gpu: data_type = dtypes.float32 # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. if test.is_gpu_available(): tolerance = 5e-3 else: # As of Aug 2016, higher tolerance is needed for some CPU architectures. # Runs on a single machine can also generate slightly different errors # because of multithreading. tolerance = 8e-3 else: data_type = dtypes.float64 tolerance = 1e-8 with self.test_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) strides = test_util.NHWCToNCHW(strides) else: input_tensor = orig_input_tensor conv = nn_ops.conv3d(input_tensor, filter_tensor, strides, padding, data_format=data_format, name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) if test_input: err = gradient_checker.compute_gradient_error( orig_input_tensor, input_shape, conv, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, conv, output_shape) print("conv3d gradient error = ", err) self.assertLess(err, tolerance)
def verifyValues(tensor_in_sizes, filter_in_sizes, stride, rho_data=0.1, rho_filter=1, padding='SAME', dim=5, max_density=0.1, num_trials=3, filter_type="K-RELU", test_type=""): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32) no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes, -3, 3) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes, -3, 3) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) filter_in_sizes2 = filter_in_sizes[:] filter_in_sizes2[-2] = filter_in_sizes2[-1] [t3ind, t3val, t3sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes2, -3, 3) s3 = tf.SparseTensor(indices=t3ind, values=t3val, dense_shape=t3sh) d3 = sp.sparse_to_dense(t3ind, t3val, t3sh) [t4ind, t4val, t4sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes2, -3, 3) s4 = tf.SparseTensor(indices=t4ind, values=t4val, dense_shape=t4sh) d4 = sp.sparse_to_dense(t4ind, t4val, t4sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) print("filter shape", filter_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t1sh) convf2 = sc_module.direct_sparse_filter_conversion( t3ind, t3val, t3sh, t3sh) convf3 = sc_module.direct_sparse_filter_conversion( t4ind, t4val, t4sh, t4sh) with tf.Session(config=config) as sess: pd = sess.run(convd) pf = sess.run(convf) pf2 = sess.run(convf2) pf3 = sess.run(convf3) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): net = sc_module.direct_sparse_conv_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, bias, strides, padding, dim, max_density, filter_type) net = sc_module.direct_sparse_conv_kd( net.out_indices, net.out_values, net.out_shape, net.out_block_channel_mapping, pf2.out_indices, pf2.out_values, pf2.out_shape, pf2.out_channel_mapping, bias, strides, padding, dim, max_density, filter_type) net = sc_module.direct_sparse_conv_kd( net.out_indices, net.out_values, net.out_shape, net.out_block_channel_mapping, pf3.out_indices, pf3.out_values, pf3.out_shape, pf3.out_channel_mapping, bias, strides, padding, dim, max_density, filter_type) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(net) t5 = time.time() for i in range(0, num_trials): sess.run(net) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() td = 0 with tf.device("/gpu:0"): net = nn_ops.conv3d(d1, d2, strides, padding) if filter_type == "K-RELU": net = nn_ops.relu(net) net = nn_ops.conv3d(net, d3, strides, padding) if filter_type == "K-RELU": net = nn_ops.relu(net) net = nn_ops.conv3d(net, d4, strides, padding) if filter_type == "K-RELU": net = nn_ops.relu(net) with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(net) t11 = time.time() for i in range(0, num_trials): sess.run(net) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping[-1]) #print("expected: ", expected) #print("sparse: ", value3, sv3) has_error = False approx_cmp = expected.flatten() approx = value3.flatten() non_zero_count = 0 for i in range(len(approx_cmp)): non_zero_count = non_zero_count + 1 print("entry count: ", non_zero_count) error_cnt = 0 first_error = 0 correct_cnt = 0 for i in range(len(approx_cmp)): if abs(approx_cmp[i] - approx[i]) > 1e-3: if has_error == False: first_error = i has_error = True error_cnt = error_cnt + 1 elif approx[i] != 0: correct_cnt = correct_cnt + 1 print("total number of non-zero corrects: ", correct_cnt) print("sparse input size: ", len(t1ind)) if has_error: print("total number of errors: ", error_cnt) print("first error: ", first_error) return 1 print("OK") return 0
def convolve_inputs(inputs, batch_size, height, width, channels, filters): W = get_variable('Weights', [1, 1, 1] + [channels, filters]) b = get_variable('Biases', [filters], initializer=constant_initializer(0.0)) y = conv3d(inputs, W, [1] * 5, 'SAME') + b return reshape(y, [batch_size, -1, height * width * filters])
def verifyValues(tensor_in_sizes, filter_in_sizes, stride, rho_data=0.1, rho_filter=1, padding='SAME', dim=5, max_density=0.1, num_trials=3, filter_type='K-RELU', test_type='', dense=True): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] out_sizes = np.copy(tensor_in_sizes) out_sizes[-1] = filter_in_sizes[-1] out_entry_count = np.prod(out_sizes) * max_density bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32) no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes, -3, 3) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) print("filter shape", filter_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t1sh) with tf.Session(config=config) as sess: pd = sess.run(convd) pf = sess.run(convf) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): approx_scskconv = sc_module.direct_sparse_conv_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, bias, strides, padding, out_entry_count, dim, max_density, filter_type) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(approx_scskconv) t5 = time.time() for i in range(0, num_trials): sess.run(approx_scskconv) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() time.sleep(1) if dense: td = 0 with tf.device("/gpu:0"): conv = nn_ops.conv3d(d1, d2, strides, padding) with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(conv) t11 = time.time() for i in range(0, num_trials): sess.run(conv) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() print("time ratio: ", ts / td) return [expected, sv3, ts, td]
def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, padding, test_input, data_format, use_gpu): input_planes, input_rows, input_cols = input_shape filter_planes, filter_rows, filter_cols = filter_shape input_shape = [batch, input_planes, input_rows, input_cols, in_depth] filter_shape = [ filter_planes, filter_rows, filter_cols, in_depth, out_depth ] if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if padding == "VALID": output_planes = int( math.ceil((input_planes - filter_planes + 1.0) / strides[1])) output_rows = int( math.ceil((input_rows - filter_rows + 1.0) / strides[2])) output_cols = int( math.ceil((input_cols - filter_cols + 1.0) / strides[3])) else: output_planes = int(math.ceil(float(input_planes) / strides[1])) output_rows = int(math.ceil(float(input_rows) / strides[2])) output_cols = int(math.ceil(float(input_cols) / strides[3])) output_shape = [batch, output_planes, output_rows, output_cols, out_depth] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. if data_type == dtypes.float64: tolerance = 1e-8 elif data_type == dtypes.float32: tolerance = 5e-3 elif data_type == dtypes.float16: tolerance = 1e-3 with self.cached_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant( input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) new_strides = test_util.NHWCToNCHW(strides) else: input_tensor = orig_input_tensor new_strides = strides conv = nn_ops.conv3d( input_tensor, filter_tensor, new_strides, padding, data_format=data_format, name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) self.assertEqual(conv.shape, tensor_shape.TensorShape(output_shape)) if test_input: jacob_t, jacob_n = gradient_checker.compute_gradient( orig_input_tensor, input_shape, conv, output_shape) else: jacob_t, jacob_n = gradient_checker.compute_gradient( filter_tensor, filter_shape, conv, output_shape) if data_type != dtypes.float16: reference_jacob_t = jacob_t err = np.fabs(jacob_t - jacob_n).max() else: # Compare fp16 theoretical gradients to fp32 theoretical gradients, # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() print("conv3d gradient error = ", err) self.assertLess(err, tolerance)
def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes, strides, dilations, padding, data_format, use_gpu, err, mode): total_input_size = 1 total_filter_size = 1 for s in input_sizes: total_input_size *= s for s in filter_sizes: total_filter_size *= s # Initializes the input tensor with array containing incrementing # numbers from 1. x1 = [f * 1.0 for f in range(1, total_input_size + 1)] x2 = [f * 1.0 for f in range(1, total_filter_size + 1)] default_dilations = (dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1) # If any dilation rate is larger than 1, only do test on the GPU # because we currently do not have a CPU implementation for arbitrary # dilation rates. # if default_dilations or use_gpu: with self.cached_session(use_gpu=use_gpu) as sess: t1_ph = tf.compat.v1.placeholder(np.float32, shape=input_sizes) t1 = constant_op.constant(x1, shape=input_sizes) t2_ph = tf.compat.v1.placeholder(np.float32, shape=filter_sizes) t2 = constant_op.constant(x2, shape=filter_sizes) full_strides = [1] + strides + [1] full_dilations = [1] + dilations + [1] actual = nn_ops.conv3d(t1_ph, t2_ph, strides=full_strides, dilations=full_dilations, padding=padding, data_format=data_format) expected = nn_ops.convolution(t1, t2, padding=padding, strides=strides, dilation_rate=dilations, data_format=data_format) actual_grad = gradients_impl.gradients( actual, t1_ph if mode == "input" else t2_ph)[0] expected_grad = gradients_impl.gradients( expected, t1 if mode == "input" else t2)[0] # "values" consists of two tensors for two backprops expected_value = self.evaluate(expected_grad) actual_sess_fn = lambda sess: sess.run(actual_grad, feed_dict={ t1_ph: t1.eval(), t2_ph: t2.eval() }) actual_value = self.with_ngraph(actual_sess_fn) self.assertShapeEqual(actual_value, actual_grad) self.assertShapeEqual(expected_value, expected_grad) print("expected = ", expected_value) print("actual = ", actual_value) self.assertArrayNear(expected_value.flatten(), actual_value.flatten(), err)
def _ConstructAndTestGradientForConfig(self, batch, input_shape, filter_shape, in_depth, out_depth, stride, padding, test_input, data_format, use_gpu): input_planes, input_rows, input_cols = input_shape filter_planes, filter_rows, filter_cols = filter_shape input_shape = [batch, input_planes, input_rows, input_cols, in_depth] filter_shape = [ filter_planes, filter_rows, filter_cols, in_depth, out_depth ] if isinstance(stride, collections_abc.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if padding == "VALID": output_planes = int( math.ceil((input_planes - filter_planes + 1.0) / strides[1])) output_rows = int( math.ceil((input_rows - filter_rows + 1.0) / strides[2])) output_cols = int( math.ceil((input_cols - filter_cols + 1.0) / strides[3])) else: output_planes = int(math.ceil(float(input_planes) / strides[1])) output_rows = int(math.ceil(float(input_rows) / strides[2])) output_cols = int(math.ceil(float(input_cols) / strides[3])) output_shape = [ batch, output_planes, output_rows, output_cols, out_depth ] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward # and backward computations. if data_type == dtypes.float64: tolerance = 1e-8 elif data_type == dtypes.float32: tolerance = 5e-3 elif data_type == dtypes.float16: tolerance = 1e-3 with self.cached_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) new_strides = test_util.NHWCToNCHW(strides) else: input_tensor = orig_input_tensor new_strides = strides conv = nn_ops.conv3d(input_tensor, filter_tensor, new_strides, padding, data_format=data_format, name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) self.assertEqual(conv.shape, tensor_shape.TensorShape(output_shape)) if test_input: jacob_t, jacob_n = gradient_checker.compute_gradient( orig_input_tensor, input_shape, conv, output_shape) else: jacob_t, jacob_n = gradient_checker.compute_gradient( filter_tensor, filter_shape, conv, output_shape) if data_type != dtypes.float16: reference_jacob_t = jacob_t err = np.fabs(jacob_t - jacob_n).max() else: # Compare fp16 theoretical gradients to fp32 theoretical gradients, # since fp16 numerical gradients are too imprecise. err = np.fabs(jacob_t - reference_jacob_t).max() print("conv3d gradient error = ", err) self.assertLess(err, tolerance)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding, expected): results = [] for data_format, use_gpu in GetTestConfigs(): for dtype in self._DtypesToTest(use_gpu): total_size_tensor = np.prod(tensor_in_sizes) total_size_filter = np.prod(filter_in_sizes) # Initializes the input tensor with array containing numbers from 0 to 1. # We keep the input tensor values fairly small to avoid overflowing float16 # during the conv3d. x1 = [ f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1) ] x2 = [ f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1) ] with self.cached_session(use_gpu=use_gpu): t1_ph = tf.compat.v1.placeholder(dtype, shape=tensor_in_sizes) t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) t2_ph = tf.compat.v1.placeholder(dtype, shape=filter_in_sizes) t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) if isinstance(stride, collections_abc.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if data_format == "NCDHW": t1 = test_util.NHWCToNCHW(t1) strides = test_util.NHWCToNCHW(strides) conv = nn_ops.conv3d(t1_ph, t2_ph, strides, padding=padding, data_format=data_format) if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) sess_fn = lambda sess: sess.run(conv, feed_dict={ t1_ph: t1.eval(), t2_ph: t2.eval() }) value = self.with_ngraph(sess_fn) print("expected = ", expected) print("actual = ", value) tol = 1e-6 if value.dtype == np.float16: tol = 1e-3 self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol)
def ConstructAndTestGradient(self, batch, input_planes, input_rows, input_cols, filter_planes, filter_rows, filter_cols, in_depth, out_depth, stride, padding, test_input): input_shape = [batch, input_planes, input_rows, input_cols, in_depth] filter_shape = [ filter_planes, filter_rows, filter_cols, in_depth, out_depth ] if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] if padding == "VALID": output_planes = int( math.ceil((input_planes - filter_planes + 1.0) / strides[1])) output_rows = int( math.ceil((input_rows - filter_rows + 1.0) / strides[2])) output_cols = int( math.ceil((input_cols - filter_cols + 1.0) / strides[3])) else: output_planes = int(math.ceil(float(input_planes) / strides[1])) output_rows = int(math.ceil(float(input_rows) / strides[2])) output_cols = int(math.ceil(float(input_cols) / strides[3])) output_shape = [ batch, output_planes, output_rows, output_cols, out_depth ] input_size = 1 for x in input_shape: input_size *= x filter_size = 1 for x in filter_shape: filter_size *= x input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] if test.is_gpu_available(): data_type = dtypes.float32 if test.is_gpu_available(): tolerance = 4e-3 else: # As of Aug 2016, higher tolerance is needed for some CPU architectures. # Runs on a single machine can also generate slightly different errors # because of multithreading. tolerance = 8e-3 else: data_type = dtypes.float64 tolerance = 1e-8 with self.test_session(use_gpu=True): input_tensor = constant_op.constant(input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant(filter_data, shape=filter_shape, dtype=data_type, name="filter") conv = nn_ops.conv3d(input_tensor, filter_tensor, strides, padding, name="conv") if test_input: err = gradient_checker.compute_gradient_error( input_tensor, input_shape, conv, output_shape) else: err = gradient_checker.compute_gradient_error( filter_tensor, filter_shape, conv, output_shape) print("conv3d gradient error = ", err) self.assertLess(err, tolerance)
def verifyValues(tensor_in_sizes, filter_in_sizes, stride, rho_data=0.1, rho_filter=1, padding='SAME', dim=5, max_density=0.1, num_trials=3, filter_type="K-RELU", test_type=""): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] out_sizes = np.copy(tensor_in_sizes) out_sizes[-1] = filter_in_sizes[-1] out_entry_count = np.prod(out_sizes) * max_density bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32) no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes, -3, 3) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) print("filter shape", filter_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t1sh) with tf.Session(config=config) as sess: pd = sess.run(convd) pf = sess.run(convf) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): approx_scskconv = sc_module.direct_sparse_conv_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, bias, strides, padding, out_entry_count, dim, max_density, filter_type) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(approx_scskconv) t5 = time.time() for i in range(0, num_trials): sess.run(approx_scskconv) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() time.sleep(1) td = 0 with tf.device("/gpu:0"): conv = nn_ops.conv3d(d1, d2, strides, padding) with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(conv) t11 = time.time() for i in range(0, num_trials): sess.run(conv) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() print("time ratio: ", ts / td) return [bp_ind, sv3_bp_val, bp_sh] = sp.createRandomSparseTensor(1, [len(sv3.out_values)], 1, 9) d3_ = sp.sparse1d_to_dense(sv3.out_indices, sv3_bp_val, sv3.out_shape, sv3.out_block_channel_mapping[-1]) out_backprop_val = constant_op.constant(d3_) t_bp1 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = nn_ops.conv3d_backprop_filter_v2(d1, filter_in_sizes, out_backprop_val, strides, padding) res_bp1 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp1 = t_bp1 + t2 - t1 t_bp1 = t_bp1 / float(num_trials) print("time bp1: ", t_bp1) t_bp2 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = nn_ops.conv3d_backprop_input_v2(tensor_in_sizes, d2, out_backprop_val, strides, padding) res_bp2 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp2 = t_bp2 + t2 - t1 t_bp2 = t_bp2 / float(num_trials) print("time bp2: ", t_bp2) t_bp3 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = sc_module.direct_sparse_conv_kd_backprop( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping, sv3_bp_val, strides, padding, dim, max_density) res_bp3 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp3 = t_bp3 + t2 - t1 t_bp3 = t_bp3 / float(num_trials) print("time bp3: ", t_bp3) print("sparse ratio: ", t_bp3 / (t_bp2 + t_bp1)) bp_sfg = sp.sparse1d_to_dense(pf.out_indices, res_bp3.filter_grads, pf.out_shape, pf.out_channel_mapping[-1]) bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3.input_grads, pd.out_shape, pd.out_block_channel_mapping[-1]) value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping[-1]) print("expected", expected) print("sv3", value3) print("out densities", sv3.out_channel_densities) has_error = False approx_cmp = expected.flatten() approx = value3.flatten() non_zero_count = 0 for i in range(len(approx_cmp)): non_zero_count = non_zero_count + 1 print("entry count: ", non_zero_count) error_cnt = 0 first_error = 0 correct_cnt = 0 for i in range(len(approx_cmp)): if approx_cmp[i] > 0 and abs(approx_cmp[i] - approx[i]) > 1e-3: if has_error == False: first_error = i has_error = True error_cnt = error_cnt + 1 elif approx[i] != 0: correct_cnt = correct_cnt + 1 bp_sig_flat = bp_sig.flatten() res_bp2_flat = res_bp2.flatten() bp_i_error_cnt = 0 bp_i_correct_cnt = 0 for i in range(len(bp_sig_flat)): if bp_sig_flat[i] != 0: if bp_sig_flat[i] == res_bp2_flat[i]: bp_i_correct_cnt = bp_i_correct_cnt + 1 else: bp_i_error_cnt = bp_i_error_cnt + 1 filter_flat = d2.flatten() bp_sfg_flat = bp_sfg.flatten() res_bp1_flat = res_bp1.flatten() bp_f_error_cnt = 0 bp_f_correct_cnt = 0 for i in range(len(filter_flat)): if filter_flat[i] != 0: if bp_sfg_flat[i] == res_bp1_flat[i]: bp_f_correct_cnt = bp_f_correct_cnt + 1 else: bp_f_error_cnt = bp_f_error_cnt + 1 print("total number of non-zero corrects: ", correct_cnt) print("sparse input size: ", len(t1ind)) print("total number of bpi corrects: ", bp_i_correct_cnt) print("sparse filter size: ", len(t2ind)) print("total number of bpf corrects: ", bp_f_correct_cnt) if has_error: print("total number of errors: ", error_cnt) print("first error: ", first_error) return 1 if bp_i_error_cnt > 0: print("total number of bpi errors: ", bp_i_error_cnt) if bp_f_error_cnt > 0: print("total number of bpf errors: ", bp_f_error_cnt) print("OK") return 0