示例#1
0
    def testConvHiddenReluMemoryEfficient(self):
        if tf.executing_eagerly():
            return  # don't run test in Eager mode

        batch = 3
        length = 23
        io_size = 16
        filter_size = 7
        x = np.random.rand(batch, length, io_size)
        dy = np.random.rand(batch, length, io_size)
        with self.session() as session:
            x = tf.to_float(x)
            dy = tf.to_float(dy)
            f1 = tf.get_variable("f1", [1, io_size, filter_size])
            f2 = tf.get_variable("f2", [1, filter_size, io_size])
            norm_scale, norm_bias = common_layers.layer_norm_vars(io_size)
            y = common_layers.conv_hidden_relu_memory_efficient(
                x,
                filter_size,
                forget=False,
                test_vars=(f1, f2, norm_scale, norm_bias))
            y_forget = common_layers.conv_hidden_relu_memory_efficient(
                x,
                filter_size,
                forget=True,
                test_vars=(f1, f2, norm_scale, norm_bias))
            dx, df1, df2, dnorm_scale, dnorm_bias = tf.gradients(
                ys=[y], xs=[x, f1, f2, norm_scale, norm_bias], grad_ys=[dy])
            dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f = tf.gradients(
                ys=[y_forget],
                xs=[x, f1, f2, norm_scale, norm_bias],
                grad_ys=[dy])
            session.run(tf.global_variables_initializer())
            (y, y_forget, dx, df1, df2, dnorm_scale, dnorm_bias, dx_f, df1_f,
             df2_f, dnorm_scale_f, dnorm_bias_f) = session.run([
                 y, y_forget, dx, df1, df2, dnorm_scale, dnorm_bias, dx_f,
                 df1_f, df2_f, dnorm_scale_f, dnorm_bias_f
             ])
        self.assertAllClose(y, y_forget)
        self.assertAllClose(df2, df2_f)
        self.assertAllClose(df1, df1_f)
        self.assertAllClose(dnorm_scale, dnorm_scale_f)
        self.assertAllClose(dnorm_bias, dnorm_bias_f)
        self.assertAllClose(dx, dx_f)
 def testConvHiddenReluMemoryEfficient(self):
   batch = 3
   length = 23
   io_size = 16
   filter_size = 7
   x = np.random.rand(batch, length, io_size)
   dy = np.random.rand(batch, length, io_size)
   with self.test_session() as session:
     x = tf.to_float(x)
     dy = tf.to_float(dy)
     f1 = tf.get_variable("f1", [1, io_size, filter_size])
     f2 = tf.get_variable("f2", [1, filter_size, io_size])
     norm_scale, norm_bias = common_layers.layer_norm_vars(io_size)
     y = common_layers.conv_hidden_relu_memory_efficient(
         x, filter_size, forget=False,
         test_vars=(f1, f2, norm_scale, norm_bias))
     y_forget = common_layers.conv_hidden_relu_memory_efficient(
         x, filter_size, forget=True,
         test_vars=(f1, f2, norm_scale, norm_bias))
     dx, df1, df2, dnorm_scale, dnorm_bias = tf.gradients(
         ys=[y], xs=[x, f1, f2, norm_scale, norm_bias], grad_ys=[dy])
     dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f = tf.gradients(
         ys=[y_forget], xs=[x, f1, f2, norm_scale, norm_bias], grad_ys=[dy])
     session.run(tf.global_variables_initializer())
     (y, y_forget,
      dx, df1, df2, dnorm_scale, dnorm_bias,
      dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f) = session.run(
          [y, y_forget,
           dx, df1, df2, dnorm_scale, dnorm_bias,
           dx_f, df1_f, df2_f, dnorm_scale_f, dnorm_bias_f])
   self.assertAllClose(y, y_forget)
   self.assertAllClose(df2, df2_f)
   self.assertAllClose(df1, df1_f)
   self.assertAllClose(dnorm_scale, dnorm_scale_f)
   self.assertAllClose(dnorm_bias, dnorm_bias_f)
   self.assertAllClose(dx, dx_f)