def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride,
                            padding, data_format, dtype, use_gpu):
    total_size_tensor = 1
    total_size_filter = 1
    for s in tensor_in_sizes:
      total_size_tensor *= s
    for s in filter_in_sizes:
      total_size_filter *= s

    # Initializes the input tensor with array containing numbers from 0 to 1.
    # We keep the input tensor values fairly small to avoid overflowing float16
    # during the conv3d.
    x1 = [f * 1.0 / total_size_tensor for f in range(1, total_size_tensor + 1)]
    x2 = [f * 1.0 / total_size_filter for f in range(1, total_size_filter + 1)]
    with self.cached_session(use_gpu=use_gpu):
      t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
      t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)

      if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
      else:
        strides = [1, stride, stride, stride, 1]

      if data_format == "NCDHW":
        t1 = test_util.NHWCToNCHW(t1)
        strides = test_util.NHWCToNCHW(strides)
      conv = nn_ops.conv3d(t1, t2, strides, padding=padding,
                           data_format=data_format)
      if data_format == "NCDHW":
        conv = test_util.NCHWToNHWC(conv)

      return conv
 def testGradient(self):
   with self.test_session():
     for padding in ["SAME", "VALID"]:
       for stride in [1, 2]:
         np.random.seed(1)
         in_shape = [2, 4, 3, 3, 2]
         in_val = constant_op.constant(
             2 * np.random.random_sample(in_shape) - 1, dtype=dtypes.float32)
         filter_shape = [3, 3, 3, 2, 3]
         strides = [1, stride, stride, stride, 1]
         # Make a convolution op with the current settings, just to easily get
         # the shape of the output.
         conv_out = nn_ops.conv3d(in_val,
                                  array_ops.zeros(filter_shape), strides,
                                  padding)
         out_backprop_shape = conv_out.get_shape().as_list()
         out_backprop_val = constant_op.constant(
             2 * np.random.random_sample(out_backprop_shape) - 1,
             dtype=dtypes.float32)
         output = nn_ops.conv3d_backprop_filter_v2(in_val, filter_shape,
                                                   out_backprop_val, strides,
                                                   padding)
         err = gradient_checker.compute_gradient_error(
             [in_val, out_backprop_val], [in_shape, out_backprop_shape],
             output, filter_shape)
         print("conv3d_backprop_filter gradient err = %g " % err)
         err_tolerance = 1e-3
         self.assertLess(err, err_tolerance)
  def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes,
                            strides, dilations, padding, data_format, use_gpu,
                            err, mode):
    total_input_size = 1
    total_filter_size = 1
    for s in input_sizes:
      total_input_size *= s
    for s in filter_sizes:
      total_filter_size *= s
    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, total_input_size + 1)]
    x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
    default_dilations = (
        dilations[0] == 1 and dilations[1] == 1 and dilations[2] == 1)

    # If any dilation rate is larger than 1, only do test on the GPU
    # because we currently do not have a CPU implementation for arbitrary
    # dilation rates.
    if default_dilations or use_gpu:
      with self.cached_session(use_gpu=use_gpu) as sess:
        if data_format == "NCDHW":
          input_sizes = test_util.NHWCToNCHW(input_sizes)
        t1 = constant_op.constant(x1, shape=input_sizes)
        t2 = constant_op.constant(x2, shape=filter_sizes)
        full_strides = [1] + strides + [1]
        full_dilations = [1] + dilations + [1]
        if data_format == "NCDHW":
          full_strides = test_util.NHWCToNCHW(full_strides)
          full_dilations = test_util.NHWCToNCHW(full_dilations)
        actual = nn_ops.conv3d(
            t1,
            t2,
            strides=full_strides,
            dilations=full_dilations,
            padding=padding,
            data_format=data_format)
        expected = nn_ops.convolution(
            t1,
            t2,
            padding=padding,
            strides=strides,
            dilation_rate=dilations,
            data_format=data_format)
        if data_format == "NCDHW":
          actual = test_util.NCHWToNHWC(actual)
          expected = test_util.NCHWToNHWC(expected)
        actual_grad = gradients_impl.gradients(actual, t1
                                               if mode == "input" else t2)[0]
        expected_grad = gradients_impl.gradients(expected, t1
                                                 if mode == "input" else t2)[0]
        # "values" consists of two tensors for two backprops
        actual_value = self.evaluate(actual_grad)
        expected_value = self.evaluate(expected_grad)
        self.assertShapeEqual(actual_value, actual_grad)
        self.assertShapeEqual(expected_value, expected_grad)
      print("expected = ", expected_value)
      print("actual = ", actual_value)
      self.assertArrayNear(expected_value.flatten(), actual_value.flatten(),
                           err)
  def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
                    expected):
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s

    if isinstance(stride, collections.Iterable):
      strides = [1] + list(stride) + [1]
    else:
      strides = [1, stride, stride, stride, 1]

    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
    with self.test_session(use_gpu=True) as sess:
      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes)
      conv = nn_ops.conv3d(t1, t2, strides, padding=padding)
      value = sess.run(conv)
    print("expected = ", expected)
    print("actual = ", value)
    self.assertArrayNear(expected, value.flatten(), 1e-5)
  def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride,
                            padding, data_format, use_gpu):
    total_size_1 = 1
    total_size_2 = 1
    for s in tensor_in_sizes:
      total_size_1 *= s
    for s in filter_in_sizes:
      total_size_2 *= s

    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
    with self.test_session(use_gpu=use_gpu):
      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes)

      if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
      else:
        strides = [1, stride, stride, stride, 1]

      if data_format == "NCDHW":
        t1 = test_util.NHWCToNCHW(t1)
        strides = test_util.NHWCToNCHW(strides)
      conv = nn_ops.conv3d(t1, t2, strides, padding=padding,
                           data_format=data_format)
      if data_format == "NCDHW":
        conv = test_util.NCHWToNHWC(conv)

      return conv
示例#6
0
def _Conv3DBackpropInputGrad(op, grad):
  return [None,
          nn_ops.conv3d_backprop_filter_v2(grad,
                                           array_ops.shape(op.inputs[1]),
                                           op.inputs[2],
                                           strides=op.get_attr("strides"),
                                           padding=op.get_attr("padding")),
          nn_ops.conv3d(grad,
                        op.inputs[1],
                        strides=op.get_attr("strides"),
                        padding=op.get_attr("padding"))]
示例#7
0
def _Conv3DBackpropFilterGrad(op, grad):
  return [nn_ops.conv3d_backprop_input_v2(array_ops.shape(op.inputs[0]),
                                          grad,
                                          op.inputs[2],
                                          strides=op.get_attr("strides"),
                                          padding=op.get_attr("padding")),
          None,
          nn_ops.conv3d(op.inputs[0],
                        grad,
                        strides=op.get_attr("strides"),
                        padding=op.get_attr("padding"))]
示例#8
0
def _Conv3DBackpropInputGrad(op, grad):
    data_format = op.get_attr("data_format")
    return [
        None,
        nn_ops.conv3d_backprop_filter_v2(grad,
                                         array_ops.shape(op.inputs[1]),
                                         op.inputs[2],
                                         strides=op.get_attr("strides"),
                                         padding=op.get_attr("padding"),
                                         data_format=data_format),
        nn_ops.conv3d(grad,
                      op.inputs[1],
                      strides=op.get_attr("strides"),
                      padding=op.get_attr("padding"),
                      data_format=data_format)
    ]
 def testForward(self):
   in_shape = LayerShapeNCDHW(batch=2, channels=3, depth=5, height=7, width=6)
   filter_shape = FilterShape3D(
       depth=3, height=3, width=3, in_channels=3, out_channels=2)
   in_op = self._random_data_op(in_shape)
   filter_op = self._random_data_op(filter_shape)
   strides = [1, 1, 1, 1, 1]
   padding = 'VALID'
   dilations = [1, 1, 2, 2, 2]
   out_op = nn_ops.conv3d(
       in_op,
       filter_op,
       strides=strides,
       padding=padding,
       data_format='NCDHW',
       dilations=dilations)
   self._assert_reproducible(out_op)
示例#10
0
  def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes,
                                   stride, dilation, padding, data_format,
                                   use_gpu):
    total_size_tensor = 1
    total_size_filter = 1
    for s in tensor_in_sizes:
      total_size_tensor *= s
    for s in filter_in_sizes:
      total_size_filter *= s

    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_filter + 1)]
    with self.cached_session(use_gpu=use_gpu):
      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes)
      if isinstance(stride, collections.Iterable):
        strides = list(stride)
      else:
        strides = [stride, stride, stride]
      if data_format == "NCDHW":
        t1 = test_util.NHWCToNCHW(t1)
        full_strides = [1, 1] + strides
        full_dilation = [1, 1] + dilation
      else:
        full_strides = [1] + strides + [1]
        full_dilation = [1] + dilation + [1]
      expected = nn_ops.convolution(
          t1,
          t2,
          padding=padding,
          strides=strides,
          dilation_rate=dilation,
          data_format=data_format)
      computed = nn_ops.conv3d(
          t1,
          t2,
          strides=full_strides,
          dilations=full_dilation,
          padding=padding,
          data_format=data_format)
      if data_format == "NCDHW":
        expected = test_util.NCHWToNHWC(expected)
        computed = test_util.NCHWToNHWC(computed)
    return expected, computed
示例#11
0
  def _ComputeReferenceDilatedConv(self, tensor_in_sizes, filter_in_sizes,
                                   stride, dilation, padding, data_format,
                                   use_gpu):
    total_size_tensor = 1
    total_size_filter = 1
    for s in tensor_in_sizes:
      total_size_tensor *= s
    for s in filter_in_sizes:
      total_size_filter *= s

    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    x1 = [f * 1.0 for f in range(1, total_size_tensor + 1)]
    x2 = [f * 1.0 for f in range(1, total_size_filter + 1)]
    with self.cached_session(use_gpu=use_gpu):
      t1 = constant_op.constant(x1, shape=tensor_in_sizes)
      t2 = constant_op.constant(x2, shape=filter_in_sizes)
      if isinstance(stride, collections.Iterable):
        strides = list(stride)
      else:
        strides = [stride, stride, stride]
      if data_format == "NCDHW":
        t1 = test_util.NHWCToNCHW(t1)
        full_strides = [1, 1] + strides
        full_dilation = [1, 1] + dilation
      else:
        full_strides = [1] + strides + [1]
        full_dilation = [1] + dilation + [1]
      expected = nn_ops.convolution(
          t1,
          t2,
          padding=padding,
          strides=strides,
          dilation_rate=dilation,
          data_format=data_format)
      computed = nn_ops.conv3d(
          t1,
          t2,
          strides=full_strides,
          dilations=full_dilation,
          padding=padding,
          data_format=data_format)
      if data_format == "NCDHW":
        expected = test_util.NCHWToNHWC(expected)
        computed = test_util.NCHWToNHWC(computed)
    return expected, computed
示例#12
0
def _Conv3DBackpropFilterGrad(op, grad):
    data_format = op.get_attr("data_format").decode()
    return [
        nn_ops.conv3d_backprop_input_v2(array_ops.shape(op.inputs[0]),
                                        grad,
                                        op.inputs[2],
                                        dilations=op.get_attr("dilations"),
                                        strides=op.get_attr("strides"),
                                        padding=op.get_attr("padding"),
                                        data_format=data_format), None,
        nn_ops.conv3d(op.inputs[0],
                      grad,
                      dilations=op.get_attr("dilations"),
                      strides=op.get_attr("strides"),
                      padding=op.get_attr("padding"),
                      data_format=data_format)
    ]
示例#13
0
def _Conv3DBackpropFilterGrad(op, grad):
  data_format = op.get_attr("data_format").decode()
  return [
      nn_ops.conv3d_backprop_input_v2(
          array_ops.shape(op.inputs[0]),
          grad,
          op.inputs[2],
          dilations=op.get_attr("dilations"),
          strides=op.get_attr("strides"),
          padding=op.get_attr("padding"),
          data_format=data_format), None,
      nn_ops.conv3d(
          op.inputs[0],
          grad,
          dilations=op.get_attr("dilations"),
          strides=op.get_attr("strides"),
          padding=op.get_attr("padding"),
          data_format=data_format)
  ]
示例#14
0
def _Conv3DBackpropInputGrad(op, grad):
  data_format = op.get_attr("data_format").decode()
  return [
      None,
      nn_ops.conv3d_backprop_filter_v2(
          grad,
          array_ops.shape(op.inputs[1]),
          op.inputs[2],
          dilations=op.get_attr("dilations"),
          strides=op.get_attr("strides"),
          padding=op.get_attr("padding"),
          data_format=data_format),
      nn_ops.conv3d(
          grad,
          op.inputs[1],
          dilations=op.get_attr("dilations"),
          strides=op.get_attr("strides"),
          padding=op.get_attr("padding"),
          data_format=data_format)
  ]
示例#15
0
    def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, stride,
                              padding, data_format, dtype, use_gpu):
        total_size_tensor = 1
        total_size_filter = 1
        for s in tensor_in_sizes:
            total_size_tensor *= s
        for s in filter_in_sizes:
            total_size_filter *= s

        # Initializes the input tensor with array containing numbers from 0 to 1.
        # We keep the input tensor values fairly small to avoid overflowing float16
        # during the conv3d.
        x1 = [
            f * 1.0 / total_size_tensor
            for f in range(1, total_size_tensor + 1)
        ]
        x2 = [
            f * 1.0 / total_size_filter
            for f in range(1, total_size_filter + 1)
        ]
        with self.cached_session(use_gpu=use_gpu):
            t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
            t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)

            if isinstance(stride, collections.Iterable):
                strides = [1] + list(stride) + [1]
            else:
                strides = [1, stride, stride, stride, 1]

            if data_format == "NCDHW":
                t1 = test_util.NHWCToNCHW(t1)
                strides = test_util.NHWCToNCHW(strides)
            conv = nn_ops.conv3d(t1,
                                 t2,
                                 strides,
                                 padding=padding,
                                 data_format=data_format)
            if data_format == "NCDHW":
                conv = test_util.NCHWToNHWC(conv)

            return conv
示例#16
0
    def loop_body(x, y): # pylint: disable=unused-argument
      """ Internal function use to convole to fill and detect the holes.
          It will return the mask filled and the location of the holes.

          Args:
            x: The mask `Tensor` with values 0 or 1 to be filled.
            y: A `Tensor` corresponding to the filter used to detect holes.

          Return:
            A list of two elements:
            1) A `Tensor` with the mask filled after a single iteration.
            2) A sparse `Tensor` where the ones represent the location of
               the holes.
      """
      if total_dims == 2:
        conv = nn_ops.conv2d(x, filt, strides=[1, 1, 1, 1], padding="SAME")
      else:
        conv = nn_ops.conv3d(x, filt, strides=[1, 1, 1, 1, 1], padding="SAME")
      holes = array_ops.where(conv >= threshold,
                              array_ops.ones_like(x), array_ops.zeros_like(x))
      return [x+holes, holes]
示例#17
0
    def test3DConv8x8x8_WithBias(self):
        with ops.device("/device:IPU:0"):
            inp = array_ops.placeholder(np.float32, [1, 84, 84, 84, 2],
                                        name="inp")
            wei = array_ops.placeholder(np.float32, [8, 8, 8, 2, 4],
                                        name="wei")
            bia = array_ops.placeholder(np.float32, [4], name="bia")
            output = nn_ops.conv3d(inp,
                                   wei,
                                   strides=[1, 4, 4, 4, 1],
                                   padding="VALID")
            output = nn_ops.bias_add(output, bia)

        with ops.device('cpu'):
            report = gen_ipu_ops.ipu_event_trace()

        tu.configure_ipu_system()

        with tu.ipu_session() as sess:
            sess.run(report)

            fd = {
                inp: np.zeros([1, 84, 84, 84, 2]),
                wei: np.zeros([8, 8, 8, 2, 4]),
                bia: np.zeros([4]),
            }
            result = sess.run(output, fd)
            self.assertAllClose(result, np.zeros([1, 20, 20, 20, 4]))

            result = sess.run(report)

            s = tu.extract_all_strings_from_event_trace(result)
            cs_list = tu.get_compute_sets_from_report(s)

            ok = [
                '__seed*', 'host-exchange-local-copy-', 'Copy_',
                'Conv3D/convolution.*/Conv_8x8x8_stride4x4x4',
                'BiasAdd/fusion/addToChannel'
            ]
            self.assertTrue(tu.check_all_compute_sets_and_list(cs_list, ok))
示例#18
0
    def test3Dimension(self):
        with self.cached_session():
            input_shape = [8, 16, 16, 16, 8]
            total_input_size = 1
            for s in input_shape:
                total_input_size *= s
            inputs = [
                i * 1.0 / total_input_size
                for i in range(1, total_input_size + 1)
            ]
            a = constant_op.constant(inputs,
                                     shape=input_shape,
                                     dtype=dtypes.float32)

            filter_shape = [1, 1, 1, 8, 8]
            total_filter_size = 1
            for s in filter_shape:
                total_filter_size *= s
            filters = [
                i * 1.0 / total_filter_size
                for i in range(1, total_filter_size + 1)
            ]
            f = constant_op.constant(filters,
                                     shape=filter_shape,
                                     dtype=dtypes.float32)

            conv_t = nn_ops.conv3d(a,
                                   filter=f,
                                   strides=[1, 1, 1, 1, 1],
                                   padding="VALID")
            slice_t = array_ops.slice(conv_t, [0, 1, 1, 1, 0], [1, 1, 1, 1, 8])
            result = self.evaluate(slice_t)
            expected = [
                0.03028321, 0.03132677, 0.03237033, 0.03341389, 0.03445745,
                0.035501, 0.03654456, 0.03758812
            ]
            self.assertAllClose(expected, result.flatten(), rtol=1e-6)
  def _ConstructAndTestGradientForConfig(
      self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
      padding, test_input, data_format, use_gpu):

    input_planes, input_rows, input_cols = input_shape
    filter_planes, filter_rows, filter_cols = filter_shape

    input_shape = [batch, input_planes, input_rows, input_cols, in_depth]
    filter_shape = [
        filter_planes, filter_rows, filter_cols, in_depth, out_depth
    ]

    if isinstance(stride, collections.Iterable):
      strides = [1] + list(stride) + [1]
    else:
      strides = [1, stride, stride, stride, 1]

    if padding == "VALID":
      output_planes = int(
          math.ceil((input_planes - filter_planes + 1.0) / strides[1]))
      output_rows = int(
          math.ceil((input_rows - filter_rows + 1.0) / strides[2]))
      output_cols = int(
          math.ceil((input_cols - filter_cols + 1.0) / strides[3]))
    else:
      output_planes = int(math.ceil(float(input_planes) / strides[1]))
      output_rows = int(math.ceil(float(input_rows) / strides[2]))
      output_cols = int(math.ceil(float(input_cols) / strides[3]))
    output_shape = [batch, output_planes, output_rows, output_cols, out_depth]
    input_size = 1
    for x in input_shape:
      input_size *= x
    filter_size = 1
    for x in filter_shape:
      filter_size *= x
    input_data = [x * 1.0 / input_size for x in range(0, input_size)]
    filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]

    if test.is_gpu_available() and use_gpu:
      data_type = dtypes.float32
      if test.is_gpu_available():
        tolerance = 4e-3
      else:
        # As of Aug 2016, higher tolerance is needed for some CPU architectures.
        # Runs on a single machine can also generate slightly different errors
        # because of multithreading.
        tolerance = 8e-3
    else:
      data_type = dtypes.float64
      tolerance = 1e-8
    with self.test_session(use_gpu=use_gpu):
      orig_input_tensor = constant_op.constant(
          input_data, shape=input_shape, dtype=data_type, name="input")
      filter_tensor = constant_op.constant(
          filter_data, shape=filter_shape, dtype=data_type, name="filter")

      if data_format == "NCDHW":
        input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
        strides = test_util.NHWCToNCHW(strides)
      else:
        input_tensor = orig_input_tensor

      conv = nn_ops.conv3d(
          input_tensor, filter_tensor, strides, padding,
          data_format=data_format, name="conv")

      if data_format == "NCDHW":
        conv = test_util.NCHWToNHWC(conv)

      if test_input:
        err = gradient_checker.compute_gradient_error(orig_input_tensor,
                                                      input_shape,
                                                      conv, output_shape)
      else:
        err = gradient_checker.compute_gradient_error(filter_tensor,
                                                      filter_shape, conv,
                                                      output_shape)
    print("conv3d gradient error = ", err)
    self.assertLess(err, tolerance)
示例#20
0
    def _ConstructAndTestGradientForConfig(self, batch, input_shape,
                                           filter_shape, in_depth, out_depth,
                                           stride, padding, test_input,
                                           data_format, use_gpu):

        input_planes, input_rows, input_cols = input_shape
        filter_planes, filter_rows, filter_cols = filter_shape

        input_shape = [batch, input_planes, input_rows, input_cols, in_depth]
        filter_shape = [
            filter_planes, filter_rows, filter_cols, in_depth, out_depth
        ]

        if isinstance(stride, collections.Iterable):
            strides = [1] + list(stride) + [1]
        else:
            strides = [1, stride, stride, stride, 1]

        if padding == "VALID":
            output_planes = int(
                math.ceil((input_planes - filter_planes + 1.0) / strides[1]))
            output_rows = int(
                math.ceil((input_rows - filter_rows + 1.0) / strides[2]))
            output_cols = int(
                math.ceil((input_cols - filter_cols + 1.0) / strides[3]))
        else:
            output_planes = int(math.ceil(float(input_planes) / strides[1]))
            output_rows = int(math.ceil(float(input_rows) / strides[2]))
            output_cols = int(math.ceil(float(input_cols) / strides[3]))
        output_shape = [
            batch, output_planes, output_rows, output_cols, out_depth
        ]
        input_size = 1
        for x in input_shape:
            input_size *= x
        filter_size = 1
        for x in filter_shape:
            filter_size *= x
        input_data = [x * 1.0 / input_size for x in range(0, input_size)]
        filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]

        if test.is_gpu_available() and use_gpu:
            data_type = dtypes.float32
            # TODO(mjanusz): Modify gradient_checker to also provide max relative
            # error and synchronize the tolerance levels between the tests for forward
            # and backward computations.
            if test.is_gpu_available():
                tolerance = 5e-3
            else:
                # As of Aug 2016, higher tolerance is needed for some CPU architectures.
                # Runs on a single machine can also generate slightly different errors
                # because of multithreading.
                tolerance = 8e-3
        else:
            data_type = dtypes.float64
            tolerance = 1e-8
        with self.test_session(use_gpu=use_gpu):
            orig_input_tensor = constant_op.constant(input_data,
                                                     shape=input_shape,
                                                     dtype=data_type,
                                                     name="input")
            filter_tensor = constant_op.constant(filter_data,
                                                 shape=filter_shape,
                                                 dtype=data_type,
                                                 name="filter")

            if data_format == "NCDHW":
                input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
                strides = test_util.NHWCToNCHW(strides)
            else:
                input_tensor = orig_input_tensor

            conv = nn_ops.conv3d(input_tensor,
                                 filter_tensor,
                                 strides,
                                 padding,
                                 data_format=data_format,
                                 name="conv")

            if data_format == "NCDHW":
                conv = test_util.NCHWToNHWC(conv)

            if test_input:
                err = gradient_checker.compute_gradient_error(
                    orig_input_tensor, input_shape, conv, output_shape)
            else:
                err = gradient_checker.compute_gradient_error(
                    filter_tensor, filter_shape, conv, output_shape)
        print("conv3d gradient error = ", err)
        self.assertLess(err, tolerance)
示例#21
0
def verifyValues(tensor_in_sizes,
                 filter_in_sizes,
                 stride,
                 rho_data=0.1,
                 rho_filter=1,
                 padding='SAME',
                 dim=5,
                 max_density=0.1,
                 num_trials=3,
                 filter_type="K-RELU",
                 test_type=""):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32)
    no_strides = [1, 1, 1, 1, 1]
    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes, -3, 3)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes, -3, 3)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    filter_in_sizes2 = filter_in_sizes[:]
    filter_in_sizes2[-2] = filter_in_sizes2[-1]
    [t3ind, t3val, t3sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes2, -3, 3)
    s3 = tf.SparseTensor(indices=t3ind, values=t3val, dense_shape=t3sh)
    d3 = sp.sparse_to_dense(t3ind, t3val, t3sh)

    [t4ind, t4val, t4sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes2, -3, 3)
    s4 = tf.SparseTensor(indices=t4ind, values=t4val, dense_shape=t4sh)
    d4 = sp.sparse_to_dense(t4ind, t4val, t4sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)
    print("filter shape", filter_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.4

    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t1sh)
        convf2 = sc_module.direct_sparse_filter_conversion(
            t3ind, t3val, t3sh, t3sh)
        convf3 = sc_module.direct_sparse_filter_conversion(
            t4ind, t4val, t4sh, t4sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
        pf = sess.run(convf)
        pf2 = sess.run(convf2)
        pf3 = sess.run(convf3)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        net = sc_module.direct_sparse_conv_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
            pf.out_shape, pf.out_channel_mapping, bias, strides, padding, dim,
            max_density, filter_type)
        net = sc_module.direct_sparse_conv_kd(
            net.out_indices, net.out_values, net.out_shape,
            net.out_block_channel_mapping, pf2.out_indices, pf2.out_values,
            pf2.out_shape, pf2.out_channel_mapping, bias, strides, padding,
            dim, max_density, filter_type)
        net = sc_module.direct_sparse_conv_kd(
            net.out_indices, net.out_values, net.out_shape,
            net.out_block_channel_mapping, pf3.out_indices, pf3.out_values,
            pf3.out_shape, pf3.out_channel_mapping, bias, strides, padding,
            dim, max_density, filter_type)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(net)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(net)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    td = 0
    with tf.device("/gpu:0"):
        net = nn_ops.conv3d(d1, d2, strides, padding)
        if filter_type == "K-RELU":
            net = nn_ops.relu(net)
        net = nn_ops.conv3d(net, d3, strides, padding)
        if filter_type == "K-RELU":
            net = nn_ops.relu(net)
        net = nn_ops.conv3d(net, d4, strides, padding)
        if filter_type == "K-RELU":
            net = nn_ops.relu(net)
    with tf.Session(config=config) as sess:
        t22 = time.time()
        expected = sess.run(net)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(net)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time dense gpu: ", td)
    tf.reset_default_graph()

    value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values,
                                  sv3.out_shape,
                                  sv3.out_block_channel_mapping[-1])
    #print("expected: ", expected)
    #print("sparse: ", value3, sv3)
    has_error = False
    approx_cmp = expected.flatten()
    approx = value3.flatten()
    non_zero_count = 0
    for i in range(len(approx_cmp)):
        non_zero_count = non_zero_count + 1
    print("entry count: ", non_zero_count)
    error_cnt = 0
    first_error = 0
    correct_cnt = 0
    for i in range(len(approx_cmp)):
        if abs(approx_cmp[i] - approx[i]) > 1e-3:
            if has_error == False:
                first_error = i
            has_error = True
            error_cnt = error_cnt + 1
        elif approx[i] != 0:
            correct_cnt = correct_cnt + 1

    print("total number of non-zero corrects: ", correct_cnt)
    print("sparse input size: ", len(t1ind))
    if has_error:
        print("total number of errors: ", error_cnt)
        print("first error: ", first_error)
        return 1
    print("OK")
    return 0
示例#22
0
def convolve_inputs(inputs, batch_size, height, width, channels, filters):
    W = get_variable('Weights', [1, 1, 1] + [channels, filters])
    b = get_variable('Biases', [filters],
                     initializer=constant_initializer(0.0))
    y = conv3d(inputs, W, [1] * 5, 'SAME') + b
    return reshape(y, [batch_size, -1, height * width * filters])
示例#23
0
def verifyValues(tensor_in_sizes,
                 filter_in_sizes,
                 stride,
                 rho_data=0.1,
                 rho_filter=1,
                 padding='SAME',
                 dim=5,
                 max_density=0.1,
                 num_trials=3,
                 filter_type='K-RELU',
                 test_type='',
                 dense=True):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    out_sizes = np.copy(tensor_in_sizes)
    out_sizes[-1] = filter_in_sizes[-1]
    out_entry_count = np.prod(out_sizes) * max_density

    bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32)
    no_strides = [1, 1, 1, 1, 1]
    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes, -3, 3)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)
    print("filter shape", filter_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t1sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
        pf = sess.run(convf)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        approx_scskconv = sc_module.direct_sparse_conv_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
            pf.out_shape, pf.out_channel_mapping, bias, strides, padding,
            out_entry_count, dim, max_density, filter_type)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(approx_scskconv)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(approx_scskconv)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    time.sleep(1)

    if dense:
        td = 0
        with tf.device("/gpu:0"):
            conv = nn_ops.conv3d(d1, d2, strides, padding)
        with tf.Session(config=config) as sess:
            t22 = time.time()
            expected = sess.run(conv)
            t11 = time.time()
            for i in range(0, num_trials):
                sess.run(conv)
            t22 = time.time()
            td = abs(t22 - t11) / max(num_trials, 1)
            print("time dense gpu: ", td)
        tf.reset_default_graph()

        print("time ratio: ", ts / td)
        return [expected, sv3, ts, td]
示例#24
0
  def _ConstructAndTestGradientForConfig(
      self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
      padding, test_input, data_format, use_gpu):

    input_planes, input_rows, input_cols = input_shape
    filter_planes, filter_rows, filter_cols = filter_shape

    input_shape = [batch, input_planes, input_rows, input_cols, in_depth]
    filter_shape = [
        filter_planes, filter_rows, filter_cols, in_depth, out_depth
    ]

    if isinstance(stride, collections.Iterable):
      strides = [1] + list(stride) + [1]
    else:
      strides = [1, stride, stride, stride, 1]

    if padding == "VALID":
      output_planes = int(
          math.ceil((input_planes - filter_planes + 1.0) / strides[1]))
      output_rows = int(
          math.ceil((input_rows - filter_rows + 1.0) / strides[2]))
      output_cols = int(
          math.ceil((input_cols - filter_cols + 1.0) / strides[3]))
    else:
      output_planes = int(math.ceil(float(input_planes) / strides[1]))
      output_rows = int(math.ceil(float(input_rows) / strides[2]))
      output_cols = int(math.ceil(float(input_cols) / strides[3]))
    output_shape = [batch, output_planes, output_rows, output_cols, out_depth]
    input_size = 1
    for x in input_shape:
      input_size *= x
    filter_size = 1
    for x in filter_shape:
      filter_size *= x
    input_data = [x * 1.0 / input_size for x in range(0, input_size)]
    filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]

    for data_type in self._DtypesToTest(use_gpu=use_gpu):
      # TODO(mjanusz): Modify gradient_checker to also provide max relative
      # error and synchronize the tolerance levels between the tests for forward
      # and backward computations.
      if data_type == dtypes.float64:
        tolerance = 1e-8
      elif data_type == dtypes.float32:
        tolerance = 5e-3
      elif data_type == dtypes.float16:
        tolerance = 1e-3

      with self.cached_session(use_gpu=use_gpu):
        orig_input_tensor = constant_op.constant(
            input_data, shape=input_shape, dtype=data_type, name="input")
        filter_tensor = constant_op.constant(
            filter_data, shape=filter_shape, dtype=data_type, name="filter")

        if data_format == "NCDHW":
          input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
          new_strides = test_util.NHWCToNCHW(strides)
        else:
          input_tensor = orig_input_tensor
          new_strides = strides

        conv = nn_ops.conv3d(
            input_tensor,
            filter_tensor,
            new_strides,
            padding,
            data_format=data_format,
            name="conv")

        if data_format == "NCDHW":
          conv = test_util.NCHWToNHWC(conv)

        self.assertEqual(conv.shape, tensor_shape.TensorShape(output_shape))

        if test_input:
          jacob_t, jacob_n = gradient_checker.compute_gradient(
              orig_input_tensor, input_shape, conv, output_shape)
        else:
          jacob_t, jacob_n = gradient_checker.compute_gradient(
              filter_tensor, filter_shape, conv, output_shape)

        if data_type != dtypes.float16:
          reference_jacob_t = jacob_t
          err = np.fabs(jacob_t - jacob_n).max()
        else:
          # Compare fp16 theoretical gradients to fp32 theoretical gradients,
          # since fp16 numerical gradients are too imprecise.
          err = np.fabs(jacob_t - reference_jacob_t).max()

      print("conv3d gradient error = ", err)
      self.assertLess(err, tolerance)
示例#25
0
    def _RunAndVerifyBackprop(self, input_sizes, filter_sizes, output_sizes,
                              strides, dilations, padding, data_format,
                              use_gpu, err, mode):
        total_input_size = 1
        total_filter_size = 1
        for s in input_sizes:
            total_input_size *= s
        for s in filter_sizes:
            total_filter_size *= s
        # Initializes the input tensor with array containing incrementing
        # numbers from 1.
        x1 = [f * 1.0 for f in range(1, total_input_size + 1)]
        x2 = [f * 1.0 for f in range(1, total_filter_size + 1)]
        default_dilations = (dilations[0] == 1 and dilations[1] == 1
                             and dilations[2] == 1)

        # If any dilation rate is larger than 1, only do test on the GPU
        # because we currently do not have a CPU implementation for arbitrary
        # dilation rates.
        # if default_dilations or use_gpu:
        with self.cached_session(use_gpu=use_gpu) as sess:

            t1_ph = tf.compat.v1.placeholder(np.float32, shape=input_sizes)
            t1 = constant_op.constant(x1, shape=input_sizes)
            t2_ph = tf.compat.v1.placeholder(np.float32, shape=filter_sizes)
            t2 = constant_op.constant(x2, shape=filter_sizes)
            full_strides = [1] + strides + [1]
            full_dilations = [1] + dilations + [1]

            actual = nn_ops.conv3d(t1_ph,
                                   t2_ph,
                                   strides=full_strides,
                                   dilations=full_dilations,
                                   padding=padding,
                                   data_format=data_format)

            expected = nn_ops.convolution(t1,
                                          t2,
                                          padding=padding,
                                          strides=strides,
                                          dilation_rate=dilations,
                                          data_format=data_format)

            actual_grad = gradients_impl.gradients(
                actual, t1_ph if mode == "input" else t2_ph)[0]
            expected_grad = gradients_impl.gradients(
                expected, t1 if mode == "input" else t2)[0]
            # "values" consists of two tensors for two backprops
            expected_value = self.evaluate(expected_grad)
            actual_sess_fn = lambda sess: sess.run(actual_grad,
                                                   feed_dict={
                                                       t1_ph: t1.eval(),
                                                       t2_ph: t2.eval()
                                                   })
            actual_value = self.with_ngraph(actual_sess_fn)

            self.assertShapeEqual(actual_value, actual_grad)
            self.assertShapeEqual(expected_value, expected_grad)

        print("expected = ", expected_value)
        print("actual = ", actual_value)

        self.assertArrayNear(expected_value.flatten(), actual_value.flatten(),
                             err)
示例#26
0
    def _ConstructAndTestGradientForConfig(self, batch, input_shape,
                                           filter_shape, in_depth, out_depth,
                                           stride, padding, test_input,
                                           data_format, use_gpu):

        input_planes, input_rows, input_cols = input_shape
        filter_planes, filter_rows, filter_cols = filter_shape

        input_shape = [batch, input_planes, input_rows, input_cols, in_depth]
        filter_shape = [
            filter_planes, filter_rows, filter_cols, in_depth, out_depth
        ]

        if isinstance(stride, collections_abc.Iterable):
            strides = [1] + list(stride) + [1]
        else:
            strides = [1, stride, stride, stride, 1]

        if padding == "VALID":
            output_planes = int(
                math.ceil((input_planes - filter_planes + 1.0) / strides[1]))
            output_rows = int(
                math.ceil((input_rows - filter_rows + 1.0) / strides[2]))
            output_cols = int(
                math.ceil((input_cols - filter_cols + 1.0) / strides[3]))
        else:
            output_planes = int(math.ceil(float(input_planes) / strides[1]))
            output_rows = int(math.ceil(float(input_rows) / strides[2]))
            output_cols = int(math.ceil(float(input_cols) / strides[3]))
        output_shape = [
            batch, output_planes, output_rows, output_cols, out_depth
        ]
        input_size = 1
        for x in input_shape:
            input_size *= x
        filter_size = 1
        for x in filter_shape:
            filter_size *= x
        input_data = [x * 1.0 / input_size for x in range(0, input_size)]
        filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]

        for data_type in self._DtypesToTest(use_gpu=use_gpu):
            # TODO(mjanusz): Modify gradient_checker to also provide max relative
            # error and synchronize the tolerance levels between the tests for forward
            # and backward computations.
            if data_type == dtypes.float64:
                tolerance = 1e-8
            elif data_type == dtypes.float32:
                tolerance = 5e-3
            elif data_type == dtypes.float16:
                tolerance = 1e-3

            with self.cached_session(use_gpu=use_gpu):
                orig_input_tensor = constant_op.constant(input_data,
                                                         shape=input_shape,
                                                         dtype=data_type,
                                                         name="input")
                filter_tensor = constant_op.constant(filter_data,
                                                     shape=filter_shape,
                                                     dtype=data_type,
                                                     name="filter")

                if data_format == "NCDHW":
                    input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
                    new_strides = test_util.NHWCToNCHW(strides)
                else:
                    input_tensor = orig_input_tensor
                    new_strides = strides

                conv = nn_ops.conv3d(input_tensor,
                                     filter_tensor,
                                     new_strides,
                                     padding,
                                     data_format=data_format,
                                     name="conv")

                if data_format == "NCDHW":
                    conv = test_util.NCHWToNHWC(conv)

                self.assertEqual(conv.shape,
                                 tensor_shape.TensorShape(output_shape))

                if test_input:
                    jacob_t, jacob_n = gradient_checker.compute_gradient(
                        orig_input_tensor, input_shape, conv, output_shape)
                else:
                    jacob_t, jacob_n = gradient_checker.compute_gradient(
                        filter_tensor, filter_shape, conv, output_shape)

                if data_type != dtypes.float16:
                    reference_jacob_t = jacob_t
                    err = np.fabs(jacob_t - jacob_n).max()
                else:
                    # Compare fp16 theoretical gradients to fp32 theoretical gradients,
                    # since fp16 numerical gradients are too imprecise.
                    err = np.fabs(jacob_t - reference_jacob_t).max()

            print("conv3d gradient error = ", err)
            self.assertLess(err, tolerance)
示例#27
0
    def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
                      expected):
        results = []
        for data_format, use_gpu in GetTestConfigs():
            for dtype in self._DtypesToTest(use_gpu):
                total_size_tensor = np.prod(tensor_in_sizes)
                total_size_filter = np.prod(filter_in_sizes)

                # Initializes the input tensor with array containing numbers from 0 to 1.
                # We keep the input tensor values fairly small to avoid overflowing float16
                # during the conv3d.
                x1 = [
                    f * 1.0 / total_size_tensor
                    for f in range(1, total_size_tensor + 1)
                ]
                x2 = [
                    f * 1.0 / total_size_filter
                    for f in range(1, total_size_filter + 1)
                ]
                with self.cached_session(use_gpu=use_gpu):

                    t1_ph = tf.compat.v1.placeholder(dtype,
                                                     shape=tensor_in_sizes)
                    t1 = constant_op.constant(x1,
                                              shape=tensor_in_sizes,
                                              dtype=dtype)
                    t2_ph = tf.compat.v1.placeholder(dtype,
                                                     shape=filter_in_sizes)
                    t2 = constant_op.constant(x2,
                                              shape=filter_in_sizes,
                                              dtype=dtype)

                    if isinstance(stride, collections_abc.Iterable):
                        strides = [1] + list(stride) + [1]
                    else:
                        strides = [1, stride, stride, stride, 1]

                    if data_format == "NCDHW":
                        t1 = test_util.NHWCToNCHW(t1)
                        strides = test_util.NHWCToNCHW(strides)
                    conv = nn_ops.conv3d(t1_ph,
                                         t2_ph,
                                         strides,
                                         padding=padding,
                                         data_format=data_format)
                    if data_format == "NCDHW":
                        conv = test_util.NCHWToNHWC(conv)

                    sess_fn = lambda sess: sess.run(conv,
                                                    feed_dict={
                                                        t1_ph: t1.eval(),
                                                        t2_ph: t2.eval()
                                                    })
                    value = self.with_ngraph(sess_fn)
                    print("expected = ", expected)
                    print("actual = ", value)
                    tol = 1e-6
                    if value.dtype == np.float16:
                        tol = 1e-3

                    self.assertAllClose(expected,
                                        value.flatten(),
                                        atol=tol,
                                        rtol=tol)
    def ConstructAndTestGradient(self, batch, input_planes, input_rows,
                                 input_cols, filter_planes, filter_rows,
                                 filter_cols, in_depth, out_depth, stride,
                                 padding, test_input):
        input_shape = [batch, input_planes, input_rows, input_cols, in_depth]
        filter_shape = [
            filter_planes, filter_rows, filter_cols, in_depth, out_depth
        ]

        if isinstance(stride, collections.Iterable):
            strides = [1] + list(stride) + [1]
        else:
            strides = [1, stride, stride, stride, 1]

        if padding == "VALID":
            output_planes = int(
                math.ceil((input_planes - filter_planes + 1.0) / strides[1]))
            output_rows = int(
                math.ceil((input_rows - filter_rows + 1.0) / strides[2]))
            output_cols = int(
                math.ceil((input_cols - filter_cols + 1.0) / strides[3]))
        else:
            output_planes = int(math.ceil(float(input_planes) / strides[1]))
            output_rows = int(math.ceil(float(input_rows) / strides[2]))
            output_cols = int(math.ceil(float(input_cols) / strides[3]))
        output_shape = [
            batch, output_planes, output_rows, output_cols, out_depth
        ]
        input_size = 1
        for x in input_shape:
            input_size *= x
        filter_size = 1
        for x in filter_shape:
            filter_size *= x
        input_data = [x * 1.0 / input_size for x in range(0, input_size)]
        filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
        if test.is_gpu_available():
            data_type = dtypes.float32
            if test.is_gpu_available():
                tolerance = 4e-3
            else:
                # As of Aug 2016, higher tolerance is needed for some CPU architectures.
                # Runs on a single machine can also generate slightly different errors
                # because of multithreading.
                tolerance = 8e-3
        else:
            data_type = dtypes.float64
            tolerance = 1e-8
        with self.test_session(use_gpu=True):
            input_tensor = constant_op.constant(input_data,
                                                shape=input_shape,
                                                dtype=data_type,
                                                name="input")
            filter_tensor = constant_op.constant(filter_data,
                                                 shape=filter_shape,
                                                 dtype=data_type,
                                                 name="filter")
            conv = nn_ops.conv3d(input_tensor,
                                 filter_tensor,
                                 strides,
                                 padding,
                                 name="conv")

            if test_input:
                err = gradient_checker.compute_gradient_error(
                    input_tensor, input_shape, conv, output_shape)
            else:
                err = gradient_checker.compute_gradient_error(
                    filter_tensor, filter_shape, conv, output_shape)
        print("conv3d gradient error = ", err)
        self.assertLess(err, tolerance)
示例#29
0
def verifyValues(tensor_in_sizes,
                 filter_in_sizes,
                 stride,
                 rho_data=0.1,
                 rho_filter=1,
                 padding='SAME',
                 dim=5,
                 max_density=0.1,
                 num_trials=3,
                 filter_type="K-RELU",
                 test_type=""):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    out_sizes = np.copy(tensor_in_sizes)
    out_sizes[-1] = filter_in_sizes[-1]
    out_entry_count = np.prod(out_sizes) * max_density
    bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32)
    no_strides = [1, 1, 1, 1, 1]
    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes, -3, 3)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)
    print("filter shape", filter_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t1sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
        pf = sess.run(convf)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        approx_scskconv = sc_module.direct_sparse_conv_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
            pf.out_shape, pf.out_channel_mapping, bias, strides, padding,
            out_entry_count, dim, max_density, filter_type)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(approx_scskconv)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(approx_scskconv)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    time.sleep(1)

    td = 0
    with tf.device("/gpu:0"):
        conv = nn_ops.conv3d(d1, d2, strides, padding)
    with tf.Session(config=config) as sess:
        t22 = time.time()
        expected = sess.run(conv)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(conv)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time dense gpu: ", td)
    tf.reset_default_graph()

    print("time ratio: ", ts / td)
    return

    [bp_ind, sv3_bp_val,
     bp_sh] = sp.createRandomSparseTensor(1, [len(sv3.out_values)], 1, 9)
    d3_ = sp.sparse1d_to_dense(sv3.out_indices, sv3_bp_val, sv3.out_shape,
                               sv3.out_block_channel_mapping[-1])
    out_backprop_val = constant_op.constant(d3_)

    t_bp1 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = nn_ops.conv3d_backprop_filter_v2(d1, filter_in_sizes,
                                                   out_backprop_val, strides,
                                                   padding)
        res_bp1 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp1 = t_bp1 + t2 - t1
    t_bp1 = t_bp1 / float(num_trials)
    print("time bp1: ", t_bp1)

    t_bp2 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = nn_ops.conv3d_backprop_input_v2(tensor_in_sizes, d2,
                                                  out_backprop_val, strides,
                                                  padding)
        res_bp2 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp2 = t_bp2 + t2 - t1
    t_bp2 = t_bp2 / float(num_trials)
    print("time bp2: ", t_bp2)

    t_bp3 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = sc_module.direct_sparse_conv_kd_backprop(
                pd.out_indices, pd.out_values, pd.out_shape,
                pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
                pf.out_shape, pf.out_channel_mapping, sv3.out_indices,
                sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping,
                sv3_bp_val, strides, padding, dim, max_density)
        res_bp3 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp3 = t_bp3 + t2 - t1
    t_bp3 = t_bp3 / float(num_trials)
    print("time bp3: ", t_bp3)
    print("sparse ratio: ", t_bp3 / (t_bp2 + t_bp1))

    bp_sfg = sp.sparse1d_to_dense(pf.out_indices, res_bp3.filter_grads,
                                  pf.out_shape, pf.out_channel_mapping[-1])
    bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3.input_grads,
                                  pd.out_shape,
                                  pd.out_block_channel_mapping[-1])
    value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values,
                                  sv3.out_shape,
                                  sv3.out_block_channel_mapping[-1])
    print("expected", expected)
    print("sv3", value3)
    print("out densities", sv3.out_channel_densities)

    has_error = False
    approx_cmp = expected.flatten()
    approx = value3.flatten()
    non_zero_count = 0
    for i in range(len(approx_cmp)):
        non_zero_count = non_zero_count + 1
    print("entry count: ", non_zero_count)
    error_cnt = 0
    first_error = 0
    correct_cnt = 0
    for i in range(len(approx_cmp)):
        if approx_cmp[i] > 0 and abs(approx_cmp[i] - approx[i]) > 1e-3:
            if has_error == False:
                first_error = i
            has_error = True
            error_cnt = error_cnt + 1
        elif approx[i] != 0:
            correct_cnt = correct_cnt + 1

    bp_sig_flat = bp_sig.flatten()
    res_bp2_flat = res_bp2.flatten()
    bp_i_error_cnt = 0
    bp_i_correct_cnt = 0
    for i in range(len(bp_sig_flat)):
        if bp_sig_flat[i] != 0:
            if bp_sig_flat[i] == res_bp2_flat[i]:
                bp_i_correct_cnt = bp_i_correct_cnt + 1
            else:
                bp_i_error_cnt = bp_i_error_cnt + 1

    filter_flat = d2.flatten()
    bp_sfg_flat = bp_sfg.flatten()
    res_bp1_flat = res_bp1.flatten()
    bp_f_error_cnt = 0
    bp_f_correct_cnt = 0
    for i in range(len(filter_flat)):
        if filter_flat[i] != 0:
            if bp_sfg_flat[i] == res_bp1_flat[i]:
                bp_f_correct_cnt = bp_f_correct_cnt + 1
            else:
                bp_f_error_cnt = bp_f_error_cnt + 1

    print("total number of non-zero corrects: ", correct_cnt)
    print("sparse input size: ", len(t1ind))
    print("total number of bpi corrects: ", bp_i_correct_cnt)
    print("sparse filter size: ", len(t2ind))
    print("total number of bpf corrects: ", bp_f_correct_cnt)
    if has_error:
        print("total number of errors: ", error_cnt)
        print("first error: ", first_error)
        return 1
    if bp_i_error_cnt > 0:
        print("total number of  bpi errors: ", bp_i_error_cnt)
    if bp_f_error_cnt > 0:
        print("total number of  bpf errors: ", bp_f_error_cnt)
    print("OK")
    return 0