Exemplo n.º 1
0
  def _kernel_constraint(self, kernel):
    """Radially constraints a kernel with shape (height, width, channels)."""
    padding = K.constant([[1, 1], [1, 1]], dtype='int32')

    kernel_shape = K.shape(kernel)[0]
    start = K.cast(kernel_shape / 2, 'int32')

    kernel_new = K.switch(
        K.cast(math_ops.floormod(kernel_shape, 2), 'bool'),
        lambda: kernel[start - 1:start, start - 1:start],
        lambda: kernel[start - 1:start, start - 1:start] + K.zeros(  # pylint: disable=g-long-lambda
            (2, 2), dtype=kernel.dtype))
    index = K.switch(
        K.cast(math_ops.floormod(kernel_shape, 2), 'bool'),
        lambda: K.constant(0, dtype='int32'),
        lambda: K.constant(1, dtype='int32'))
    while_condition = lambda index, *args: K.less(index, start)

    def body_fn(i, array):
      return i + 1, array_ops.pad(
          array,
          padding,
          constant_values=kernel[start + i, start + i])

    _, kernel_new = control_flow_ops.while_loop(
        while_condition,
        body_fn,
        [index, kernel_new],
        shape_invariants=[index.get_shape(),
                          tensor_shape.TensorShape([None, None])])
    return kernel_new
Exemplo n.º 2
0
 def testFloorModBfloat16(self):
   nums, divs = self.floatTestData()
   tf_result = math_ops.floormod(
       math_ops.cast(nums, dtypes.bfloat16),
       math_ops.cast(divs, dtypes.bfloat16))
   np_result = nums % divs
   self.assertAllEqual(tf_result, np_result)
Exemplo n.º 3
0
 def testConsistent(self):
   nums, divs = self.intTestData()
   with self.test_session():
     tf_result = (
         math_ops.floor_div(nums, divs) * divs + math_ops.floormod(nums, divs)
     ).eval()
     tf_nums = array_ops.constant(nums)
     tf_divs = array_ops.constant(divs)
     tf2_result = (tf_nums // tf_divs * tf_divs + tf_nums % tf_divs).eval()
     np_result = (nums // divs) * divs + (nums % divs)
     # consistentcy with numpy
     self.assertAllEqual(tf_result, np_result)
     # consistentcy with two forms of divide
     self.assertAllEqual(tf_result, tf2_result)
     # consistency for truncation form
     tf3_result = (
         math_ops.truncatediv(nums, divs) * divs
         + math_ops.truncatemod(nums, divs)
     ).eval()
     expanded_nums = np.reshape(np.tile(nums, divs.shape[1]),
                                (nums.shape[0], divs.shape[1]))
     # Consistent with desire to get numerator
     self.assertAllEqual(tf3_result, expanded_nums)
     # Consistent with desire to get numerator
     self.assertAllEqual(tf_result, expanded_nums)
Exemplo n.º 4
0
 def testFloorModInt(self):
     nums, divs = self.intTestData()
     # TODO(aselle): Change test to use % after switch
     # tf_result = math_ops.floor_mod(nums, divs)
     tf_result = math_ops.floormod(nums, divs)
     np_result = nums % divs
     self.assertAllEqual(tf_result, np_result)
Exemplo n.º 5
0
 def testFloorModInt(self):
     nums, divs = self.intTestData()
     with self.test_session():
         # TODO (aselle): Change test to use % after switch id:3425 gh:3426
         # tf_result = math_ops.floor_mod(nums, divs).eval()
         tf_result = math_ops.floormod(nums, divs).eval()
         np_result = nums % divs
         self.assertAllEqual(tf_result, np_result)
Exemplo n.º 6
0
 def testFloorModInt(self):
   nums, divs = self.intTestData()
   with self.test_session():
     # TODO(aselle): Change test to use % after switch
     # tf_result = math_ops.floor_mod(nums, divs).eval()
     tf_result = math_ops.floormod(nums, divs).eval()
     np_result = nums % divs
     self.assertAllEqual(tf_result, np_result)
Exemplo n.º 7
0
 def testFloorModFloat(self):
   nums, divs = self.floatTestData()
   for dtype in [np.float16, np.float32, np.float64]:
     x = nums.astype(dtype)
     y = divs.astype(dtype)
     tf_result = math_ops.floormod(x, y)
     np_result = x % y
     self.assertAllEqual(tf_result, np_result)
     tf2_result = (array_ops.constant(x) % array_ops.constant(y))
     self.assertAllEqual(tf2_result, tf_result)
Exemplo n.º 8
0
 def testFloorModGradient(self):
   # Making sure the input is not near the discontinuity point where
   # x/y == floor(x/y)
   ns = constant_op.constant([17.], dtype=dtypes.float32)
   inputs = constant_op.constant([131.], dtype=dtypes.float32)
   floor_mod = math_ops.floormod(inputs, ns)
   with self.cached_session():
     error = gradient_checker.compute_gradient_error(inputs, [1],
                                                     floor_mod, [1])
     self.assertLess(error, 1e-4)
Exemplo n.º 9
0
 def testFloorDivModIntEdges(self):
   for dtype in [np.int32, np.int64]:
     x, y = self.intEdgeTestData(dtype)
     tf_floor_div = math_ops.floor_div(x, y)
     np_floor_div = self.numpySafeFloorDivInt(x, y)
     self.assertAllEqual(tf_floor_div, np_floor_div)
     tf_floor_mod = math_ops.floormod(x, y)
     np_floor_mod = self.numpySafeFloorModInt(x, y)
     self.assertAllEqual(tf_floor_mod, np_floor_mod)
     z = math_ops.add(math_ops.multiply(tf_floor_div, y), tf_floor_mod)
     # x = floor_div(x, y) * y + floor_mod(x, y)
     self.assertAllEqual(z, np.broadcast_to(x, z.shape))
Exemplo n.º 10
0
    def _replace_oov_buckets(self, inputs, lookups):
        """Replace the default OOV value with one of the OOV bucket values."""
        if self.oov_tokens is None:
            return lookups

        num_oov_elements = self.oov_tokens.shape.num_elements()
        if inputs.dtype.is_integer:
            oov_indices = math_ops.floormod(inputs, num_oov_elements)
        else:
            oov_indices = string_ops.string_to_hash_bucket_fast(
                inputs, num_buckets=num_oov_elements)

        oov_values = array_ops.gather(self.oov_tokens, oov_indices)
        oov_locations = math_ops.equal(lookups, self.table._default_value)  # pylint: disable=protected-access

        return array_ops.where(oov_locations, oov_values, lookups)
Exemplo n.º 11
0
 def testConsistent(self):
     nums, divs = self.intTestData()
     tf_result = (math_ops.floor_div(nums, divs) * divs +
                  math_ops.floormod(nums, divs))
     tf_nums = array_ops.constant(nums)
     tf_divs = array_ops.constant(divs)
     tf2_result = (tf_nums // tf_divs * tf_divs + tf_nums % tf_divs)
     np_result = (nums // divs) * divs + (nums % divs)
     # Consistent with numpy
     self.assertAllEqual(tf_result, np_result)
     # Consistent with two forms of divide
     self.assertAllEqual(tf_result, tf2_result)
     # consistency for truncation form
     tf3_result = (math_ops.truncatediv(nums, divs) * divs +
                   math_ops.truncatemod(nums, divs))
     expanded_nums = np.reshape(np.tile(nums, divs.shape[1]),
                                (nums.shape[0], divs.shape[1]))
     # Consistent with desire to get numerator
     self.assertAllEqual(tf3_result, expanded_nums)
     # Consistent with desire to get numerator
     self.assertAllEqual(tf_result, expanded_nums)
Exemplo n.º 12
0
 def testFloorModFloat(self):
   nums, divs = self.floatTestData()
   with self.test_session():
     tf_result = math_ops.floormod(nums, divs).eval()
     np_result = nums % divs
     self.assertAllEqual(tf_result, np_result)
Exemplo n.º 13
0
  def _finish(self, state):
    var_dtype = self._variables[0].dtype.base_dtype
    # Update global step.
    global_step = self._get_global_step(state)
    update_global_step = state_ops.assign_add(global_step, 1.)

    # Update the first moment estimate.
    beta1 = state.get_hyper("beta1", dtype=var_dtype)
    moment1 = self._get_moment1(state)
    flat_grad = self._get_flat_grad(state)
    # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t
    update_moment1 = moment1.assign(beta1 * moment1 + (1. - beta1) * flat_grad)

    # Update the gradient buffer.
    window = state.get_hyper("window")
    grad_buffer = self._get_grad_buffer(state)
    next_grad_index = math_ops.floormod(
        math_ops.to_int32(update_global_step - 1.), window)
    # grad_buffer[(t-1) % window] := moment1_t
    update_grad_buffer = state_ops.scatter_update(grad_buffer, next_grad_index,
                                                  update_moment1)

    # Compute the update step.
    eps = state.get_hyper("eps", dtype=var_dtype)
    svd_eps = state.get_hyper("svd_eps", dtype=var_dtype)
    sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype)
    lr = state.get_hyper("lr", dtype=var_dtype)
    denom = math_ops.sqrt(
        math_ops.minimum(
            ops.convert_to_tensor(update_global_step),
            ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype))))
    moment1_2d = array_ops.expand_dims(update_moment1, -1)

    # m = grad_buffer^T / sqrt(min(t, window))
    # m has shape [model dimension, window], where model dimension is the sum
    # of the dimensions of the flattened variables.
    m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom))

    # sigma, u, _ = SVD(m^Tm + I * svd_eps)
    mm = math_ops.matmul(m, m, transpose_a=True)
    damping = math_ops.cast(linalg_ops.eye(window), dtype=var_dtype) * svd_eps
    sigma, u, _ = linalg_ops.svd(mm + damping)
    sigma_sqrt = math_ops.sqrt(sigma)
    sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt)

    # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3
    # We add sigma_eps to alleviate numerical instability.
    # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T.
    sigma_sqrt_inv = math_ops.divide(
        math_ops.cast(1.0, dtype=var_dtype),
        math_ops.pow(sigma_sqrt + sigma_eps, 3))

    # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the
    # inversion of a model dimension by model dimension matrix is needed. To
    # speed up this computation we calculate the following instead:
    # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1.
    new_step = array_ops.expand_dims(
        array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1)
    head = math_ops.matmul(
        m,
        math_ops.matmul(
            u,
            math_ops.matmul(
                array_ops.diag(sigma_sqrt_inv),
                math_ops.matmul(
                    u,
                    math_ops.matmul(m, moment1_2d, transpose_a=True),
                    transpose_a=True))))

    # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for
    # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using
    # Woodbury's identity.
    # For full derivation please see paper at
    # https://arxiv.org/pdf/1806.02958.pdf
    tail = moment1_2d - math_ops.matmul(
        m,
        math_ops.matmul(
            u,
            math_ops.matmul(
                array_ops.diag(
                    math_ops.divide(math_ops.cast(1.0, dtype=var_dtype),
                                    sigma)),
                math_ops.matmul(
                    u,
                    math_ops.matmul(m, moment1_2d, transpose_a=True),
                    transpose_a=True))))
    scaled_tail = math_ops.divide(tail, sigma_sqrt_min)

    update_new_step = control_flow_ops.cond(
        sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail),
        lambda: math_ops.add(new_step, head))

    # Update each variable.
    update_step = []
    for var in self._variables:
      dim = self.shape_dict[var.name]
      start_index = self.index_dict[var.name]
      end_index = start_index + dim
      var_update_correct_shape = array_ops.reshape(
          update_new_step[start_index:end_index], var.get_shape())
      var_updated = state_ops.assign_sub(var, lr * var_update_correct_shape)
      update_step.append(var_updated)

    return control_flow_ops.group(update_step)
Exemplo n.º 14
0
    def _finish(self, state):
        var_dtype = self._variables[0].dtype.base_dtype
        # Update global step.
        global_step = self._get_global_step(state)
        update_global_step = state_ops.assign_add(global_step, 1.)

        # Update the first moment estimate.
        beta1 = state.get_hyper("beta1", dtype=var_dtype)
        moment1 = self._get_moment1(state)
        flat_grad = self._get_flat_grad(state)
        # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t
        update_moment1 = moment1.assign(beta1 * moment1 +
                                        (1. - beta1) * flat_grad)

        # Update the gradient buffer.
        window = state.get_hyper("window")
        grad_buffer = self._get_grad_buffer(state)
        next_grad_index = math_ops.floormod(
            math_ops.to_int32(update_global_step - 1.), window)
        # grad_buffer[(t-1) % window] := moment1_t
        update_grad_buffer = state_ops.scatter_update(grad_buffer,
                                                      next_grad_index,
                                                      update_moment1)

        # Compute the update step.
        eps = state.get_hyper("eps", dtype=var_dtype)
        svd_eps = state.get_hyper("svd_eps", dtype=var_dtype)
        sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype)
        lr = state.get_hyper("lr", dtype=var_dtype)
        denom = math_ops.sqrt(
            math_ops.minimum(
                ops.convert_to_tensor(update_global_step),
                ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype))))
        moment1_2d = array_ops.expand_dims(update_moment1, -1)

        # m = grad_buffer^T / sqrt(min(t, window))
        # m has shape [model dimension, window], where model dimension is the sum
        # of the dimensions of the flattened variables.
        m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom))

        # sigma, u, _ = SVD(m^Tm + I * svd_eps)
        mm = math_ops.matmul(m, m, transpose_a=True)
        damping = math_ops.cast(linalg_ops.eye(window),
                                dtype=var_dtype) * svd_eps
        sigma, u, _ = linalg_ops.svd(mm + damping)
        sigma_sqrt = math_ops.sqrt(sigma)
        sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt)

        # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3
        # We add sigma_eps to alleviate numerical instability.
        # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T.
        sigma_sqrt_inv = math_ops.divide(
            math_ops.cast(1.0, dtype=var_dtype),
            math_ops.pow(sigma_sqrt + sigma_eps, 3))

        # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the
        # inversion of a model dimension by model dimension matrix is needed. To
        # speed up this computation we calculate the following instead:
        # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1.
        new_step = array_ops.expand_dims(
            array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1)
        head = math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(sigma_sqrt_inv),
                    math_ops.matmul(u,
                                    math_ops.matmul(m,
                                                    moment1_2d,
                                                    transpose_a=True),
                                    transpose_a=True))))

        # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for
        # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using
        # Woodbury's identity.
        # For full derivation please see paper at
        # https://arxiv.org/pdf/1806.02958.pdf
        tail = moment1_2d - math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(
                        math_ops.divide(math_ops.cast(1.0, dtype=var_dtype),
                                        sigma)),
                    math_ops.matmul(u,
                                    math_ops.matmul(
                                        m, moment1_2d, transpose_a=True),
                                    transpose_a=True))))
        scaled_tail = math_ops.divide(tail, sigma_sqrt_min)

        update_new_step = control_flow_ops.cond(
            sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail),
            lambda: math_ops.add(new_step, head))

        # Update each variable.
        update_step = []
        for var in self._variables:
            dim = self.shape_dict[var.name]
            start_index = self.index_dict[var.name]
            end_index = start_index + dim
            var_update_correct_shape = array_ops.reshape(
                update_new_step[start_index:end_index], var.get_shape())
            var_updated = state_ops.assign_sub(var,
                                               lr * var_update_correct_shape)
            update_step.append(var_updated)

        return control_flow_ops.group(update_step)
Exemplo n.º 15
0
 def testFloorModFloat(self):
     nums, divs = self.floatTestData()
     with self.cached_session():
         tf_result = math_ops.floormod(nums, divs).eval()
         np_result = nums % divs
         self.assertAllEqual(tf_result, np_result)
Exemplo n.º 16
0
 def __mod__(self, other):
     return math_ops.floormod(self, other)
Exemplo n.º 17
0
 def testFloorModFloat(self):
     nums, divs = self.floatTestData()
     tf_result = math_ops.floormod(nums, divs)
     np_result = nums % divs
     self.assertAllEqual(tf_result, np_result)
Exemplo n.º 18
0
 def __rmod__(self, other):
   return math_ops.floormod(other, self)
Exemplo n.º 19
0
 def __rmod__(self, other):
     return math_ops.floormod(other, self)
Exemplo n.º 20
0
 def __mod__(self, other):
   return math_ops.floormod(self, other)