示例#1
0
    def testStackingStreamStepRightContext(self):
        tf.random.set_seed(2021)
        batch_size, max_seqlen, input_dim, kernel = 2, 16, 8, 3
        left_context, right_context = 6, 3
        num_heads, ffn_dim = 2, 4
        stride = 1
        num_layers = 3
        num_groups = 2

        # Prepares inputs.
        np.random.seed(None)
        inputs = np.random.normal(
            0.1, 1, [batch_size, max_seqlen, input_dim]).astype(np.float32)
        print(f'np.sum(inputs): {np.sum(inputs)}')
        inputs = tf.convert_to_tensor(inputs)

        seqlen = np.random.randint(low=max_seqlen // 2,
                                   high=max_seqlen + 1,
                                   size=(batch_size, ),
                                   dtype=np.int32)
        print(f'seqlen: {seqlen}')

        seqlen = tf.convert_to_tensor(seqlen)
        paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen)

        p = conformer_layer.ConformerLayer.CommonParams(
            input_dim=input_dim,
            is_causal=True,
            layer_order='conv_before_mhsa',
            atten_num_heads=num_heads,
            atten_left_context=left_context,
            atten_right_context=right_context,
            use_relative_atten=False,
            fflayer_hidden_dim=ffn_dim,
            kernel_size=kernel)
        p.lconv_tpl.conv_norm_layer_tpl = bn_layers.GroupNormLayer.Params(
        ).Set(num_groups=num_groups, cumulative=True)
        p.params_init = py_utils.WeightInit.Xavier(scale=1.0, seed=0)

        ps = [p.Copy().Set(name=f'base{i}') for i in range(num_layers)]
        layers = [x.Instantiate() for x in ps]

        base_outputs = self._BuildStackingBaseGraph(layers, num_layers, inputs,
                                                    paddings)

        outputs = self._BuildStackingStreamGraph(layers, num_layers, inputs,
                                                 paddings, stride)

        init_op = tf.global_variables_initializer()
        with self.session(use_gpu=False) as sess:
            sess.run(init_op)

            expected, actual = sess.run([base_outputs, outputs])
            print(f'expected: {repr(expected)}, {expected.shape}')
            print(f'actual: {repr(actual)}, {actual.shape}')
            print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
            print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
            self.assertAllClose(expected, actual, atol=2e-6, rtol=2e-6)
            self.assertEqual(tuple(expected.shape),
                             (batch_size, max_seqlen, input_dim))
  def _GetInputs(self, batch_size, max_seqlen, input_dim, full_seq=False):
    # Prepares inputs.
    np.random.seed(None)
    if self.input_rank == 3:
      inputs = np.random.normal(
          0.5, 1, [batch_size, max_seqlen, input_dim]).astype(np.float32)
    else:
      assert self.input_rank == 4
      inputs = np.random.normal(
          0.5, 1, [batch_size, max_seqlen, 1, input_dim]).astype(np.float32)
    print(f'np.sum(inputs): {np.sum(inputs)}')
    inputs = tf.convert_to_tensor(inputs)

    if not full_seq:
      seqlen = np.random.randint(
          low=max_seqlen // 2,
          high=max_seqlen + 1,
          size=(batch_size,),
          dtype=np.int32)
    else:
      seqlen = np.full((batch_size,), max_seqlen, dtype=np.int32)
    print(f'seqlen: {seqlen}')

    seqlen = tf.convert_to_tensor(seqlen)
    paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen)
    return inputs, paddings
示例#3
0
    def testStreamStep(self, testonly_skip_norm_layers=False, norm_type='ln'):
        with flagsaver.flagsaver(
                testonly_skip_norm_layers=testonly_skip_norm_layers
        ), cluster_factory.SetEval(True):
            assert norm_type in ('ln', 'gn')
            batch, max_seqlen, input_dim, kernel = 2, 8, 2, 3
            p = conformer_layer.LConvLayer.CommonParams(input_dim=input_dim,
                                                        is_causal=True,
                                                        kernel_size=kernel)
            if norm_type == 'ln':
                p.conv_norm_layer_tpl = lingvo_layers.LayerNorm.Params()
            else:
                p.conv_norm_layer_tpl = bn_layers.GroupNormLayer.Params().Set(
                    num_groups=2, cumulative=True)
            p.name = 'lconv'

            l = p.Instantiate()
            init_op = tf.global_variables_initializer()

            np.random.seed(None)
            inputs = np.random.normal(
                0.1, 0.5, [batch, max_seqlen, input_dim]).astype(np.float32)
            print(f'np.sum(inputs): {np.sum(inputs)}')
            inputs = tf.convert_to_tensor(inputs)

            seqlen = np.random.randint(low=1,
                                       high=max_seqlen + 1,
                                       size=(batch, ),
                                       dtype=np.int32)
            print(repr(seqlen))
            seqlen = tf.convert_to_tensor(seqlen)
            paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen)
            base_outputs, _ = l.FProp(l.theta, inputs, paddings)
            base_outputs *= tf.expand_dims(1. - paddings, -1)

            outputs = []
            state = l.zero_state(batch)
            for i in range(max_seqlen):
                output, _, state = l.StreamStep(l.theta, inputs[:,
                                                                i:(i + 1), :],
                                                paddings[:, i:(i + 1)], state)
                outputs.append(output)
            # [b, t, d]
            outputs = tf.concat(outputs, axis=1)
            outputs *= tf.expand_dims(1. - paddings, -1)

            with self.session(use_gpu=False) as sess:
                sess.run(init_op)
                expected, actual = sess.run([base_outputs, outputs])
                print(repr(expected))
                print(repr(actual))
                print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
                print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
                self.assertAllClose(expected, actual)
示例#4
0
  def testCausalDepthwiseConv2DLayerStreamStep(self,
                                               testonly_skip_norm_layers=False):
    with flagsaver.flagsaver(
        testonly_skip_norm_layers=testonly_skip_norm_layers):
      batch_size, max_seqlen, channel = 2, 32, 3
      kernel, channel_multiplier = 5, 1
      params = conv_layers.CausalDepthwiseConv2DLayer.Params().Set(
          name='conv',
          filter_stride=[1, 1],
          filter_shape=[kernel, 1, channel, channel_multiplier],
          params_init=py_utils.WeightInit.Gaussian(0.1))

      conv_layer = params.Instantiate()
      init_op = tf.global_variables_initializer()

      np.random.seed(None)
      inputs = np.random.normal(
          0.5, 1, [batch_size, max_seqlen, 1, channel]).astype(np.float32)
      print(f'np.sum(inputs): {np.sum(inputs)}')
      inputs = tf.convert_to_tensor(inputs)
      seqlen = tf.random.uniform([batch_size],
                                 minval=1,
                                 maxval=max_seqlen + 1,
                                 dtype=tf.int32)
      input_padding = py_utils.PaddingsFromLengths(seqlen, max_seqlen)
      base_outputs, _ = conv_layer.FProp(conv_layer.theta, inputs,
                                         input_padding)
      base_outputs *= tf.reshape(1. - input_padding,
                                 [batch_size, max_seqlen, 1, 1])

      outputs = []
      state = conv_layer.zero_state(batch_size)
      for i in range(0, max_seqlen):
        output, _, state = conv_layer.StreamStep(conv_layer.theta,
                                                 inputs[:, i:(i + 1), :, :],
                                                 input_padding[:, i:(i + 1)],
                                                 state)
        outputs.append(output)
      # [b, t, 1, c * channel_multiplier]
      outputs = tf.concat(outputs, axis=1)
      outputs *= tf.reshape(1. - input_padding, [batch_size, max_seqlen, 1, 1])

      with self.session(use_gpu=True) as sess:
        sess.run(init_op)
        expected, actual = sess.run([base_outputs, outputs])
        print(repr(expected))
        print(repr(actual))
        print(f'np.sum(np.abs(ref_val)): {np.sum(np.abs(expected))}')
        print(f'np.sum(np.abs(new_val)): {np.sum(np.abs(actual))}')
        self.assertAllClose(expected, actual)
示例#5
0
  def _TestLeadingPaddingsHelper(self, stride=1):
    """Tests leading paddings case, useful for local atten with right ctx."""
    batch, max_seqlen, channel = 2, 16, 2
    kernel, channel_multiplier = 3, 2

    p = conv_layers.CausalDepthwiseConv2DLayer.Params().Set(
        name='conv',
        filter_stride=[1, 1],
        filter_shape=[kernel, 1, channel, channel_multiplier],
        params_init=py_utils.WeightInit.Gaussian(0.1))

    l = p.Instantiate()
    init_op = tf.global_variables_initializer()

    np.random.seed(None)
    inputs = np.random.normal(0.1, 0.5, [batch, max_seqlen, 1, channel]).astype(
        np.float32)
    print(f'np.sum(inputs): {np.sum(inputs)}')
    inputs_t = tf.convert_to_tensor(inputs)

    # The upperbound is always max_seqlen-1, so the batch is always padded.
    seqlen = np.random.randint(
        low=1, high=max_seqlen, size=(batch,), dtype=np.int32)
    print(f'seqlen: {seqlen}')
    paddings = py_utils.PaddingsFromLengths(
        tf.convert_to_tensor(seqlen), max_seqlen)

    shift_inputs = np.array(inputs)
    for i in range(batch):
      shift_inputs[i] = np.roll(shift_inputs[i], max_seqlen - seqlen[i], axis=0)
    shift_inputs_t = tf.convert_to_tensor(shift_inputs)

    # Has the same number of tokens as paddings per example
    leading_paddings = 1 - py_utils.PaddingsFromLengths(
        max_seqlen - tf.convert_to_tensor(seqlen), max_seqlen)

    def expand_pad(pad):  # pylint:disable=invalid-name
      return py_utils.AppendDims(pad, 2)

    def stream(l, inputs, paddings):  # pylint:disable=invalid-name
      state = l.zero_state(batch)
      all_outs = []
      for i in range(max_seqlen // stride):
        step_inputs = inputs[:, stride * i:stride * (i + 1)]
        step_paddings = paddings[:, stride * i:stride * (i + 1)]
        output, _, state = l.StreamStep(l.theta, step_inputs, step_paddings,
                                        state)
        all_outs.append(output)
      all_outs = tf.concat(all_outs, axis=1)
      return all_outs * (1. - expand_pad(paddings))

    base_outs = stream(l, inputs_t, paddings)
    actual_outs = stream(l, shift_inputs_t, leading_paddings)

    with self.session(use_gpu=False) as sess:
      sess.run(init_op)
      expected, actual = sess.run([base_outs, actual_outs])
      for i in range(batch):
        actual[i] = np.roll(actual[i], -(max_seqlen - seqlen[i]), axis=0)
      print(f'expected: {repr(expected)}')
      print(f'actual: {repr(actual)}')
      print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
      print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
      self.assertAllClose(expected, actual)
示例#6
0
    def testStreamStep(self,
                       testonly_skip_norm_layers=False,
                       norm_type='ln',
                       num_groups=2,
                       stride=1,
                       layer_order='conv_before_mhsa',
                       has_lconv='depthwise',
                       has_fflayer_start=True,
                       right_context=0):
        assert norm_type in ('ln', 'gn'), norm_type
        with flagsaver.flagsaver(
                testonly_skip_norm_layers=testonly_skip_norm_layers
        ), cluster_factory.SetEval(True):
            batch, max_seqlen, input_dim, kernel = 2, 16, 8, 3
            assert max_seqlen % stride == 0

            if layer_order == 'mhsa':
                kernel = None
            num_heads, left_context, ffn_dim = 2, 3, 4
            p = conformer_layer.ConformerLayer.CommonParams(
                input_dim=input_dim,
                is_causal=True,
                atten_num_heads=num_heads,
                atten_left_context=left_context,
                atten_right_context=right_context,
                use_relative_atten=False,
                fflayer_hidden_dim=ffn_dim,
                kernel_size=kernel,
                layer_order=layer_order)
            if norm_type == 'ln':
                p.lconv_tpl.conv_norm_layer_tpl = lingvo_layers.LayerNorm.Params(
                )
            else:
                p.lconv_tpl.conv_norm_layer_tpl = bn_layers.GroupNormLayer.Params(
                ).Set(num_groups=num_groups, cumulative=True)
            if not has_lconv:
                p.lconv_tpl = None
            elif has_lconv == 'conv2d':
                p.lconv_tpl.depthwise_conv_tpl = (
                    conv_layers_with_time_padding.CausalConv2DLayerWithPadding.
                    Params())
            else:
                assert has_lconv == 'depthwise'
            if not has_fflayer_start:
                p.fflayer_start_tpl = None
            p.name = 'conformer'

            l = p.Instantiate()
            init_op = tf.global_variables_initializer()

            np.random.seed(None)
            inputs = 5 * np.random.normal(
                0.1, 0.5, [batch, max_seqlen, input_dim]).astype(np.float32)
            print(f'np.sum(inputs): {np.sum(inputs)}')
            inputs = tf.convert_to_tensor(inputs)

            seqlen = np.random.randint(low=1,
                                       high=max_seqlen + 1,
                                       size=(batch, ),
                                       dtype=np.int32)
            print(f'seqlen: {seqlen}')
            seqlen = tf.convert_to_tensor(seqlen)
            paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen)

            base_output_map = l.FProp(
                l.theta, py_utils.NestedMap(features=inputs,
                                            paddings=paddings))
            base_outputs = base_output_map.features
            base_outputs *= tf.expand_dims(1. - paddings, -1)

            outputs = []
            state = l.zero_state(batch)
            for i in range(max_seqlen // stride +
                           int(math.ceil(right_context / stride))):
                if i < max_seqlen // stride:
                    step_inputs = inputs[:, stride * i:stride * (i + 1)]
                    step_paddings = paddings[:, stride * i:stride * (i + 1)]
                else:
                    step_inputs = tf.zeros_like(inputs[:, 0:stride])
                    step_paddings = tf.ones_like(paddings[:, 0:stride])
                output, _, state = l.StreamStep(l.theta, step_inputs,
                                                step_paddings, state)
                outputs.append(output)

            outputs = tf.concat(outputs, axis=1)
            outputs = outputs[:, right_context:][:, :max_seqlen]
            outputs *= tf.reshape(1. - paddings, [batch, max_seqlen, 1])

            with self.session(use_gpu=False) as sess:
                sess.run(init_op)
                expected, actual = sess.run([base_outputs, outputs])
                print(repr(expected))
                print(repr(actual))
                print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
                print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
                tol = 3.e-6 if testonly_skip_norm_layers else 2.e-5
                self.assertAllClose(expected, actual, atol=tol, rtol=tol)
示例#7
0
  def testStreamStep(self,
                     testonly_skip_norm_layers=False,
                     norm_type='ln',
                     num_groups=2,
                     stride=1,
                     layer_order='conv_before_mhsa',
                     has_lconv=True,
                     has_fflayer_start=True):
    assert norm_type in ('ln', 'gn'), norm_type
    with flagsaver.flagsaver(testonly_skip_norm_layers=testonly_skip_norm_layers
                            ), cluster_factory.SetEval(True):
      batch, max_seqlen, input_dim, kernel = 2, 16, 8, 3
      if layer_order == 'mhsa':
        kernel = None
      num_heads, left_context, ffn_dim = 2, 3, 4
      p = conformer_layer.ConformerLayer.CommonParams(
          input_dim=input_dim,
          is_causal=True,
          atten_num_heads=num_heads,
          atten_left_context=left_context,
          atten_right_context=0,
          use_relative_atten=False,
          fflayer_hidden_dim=ffn_dim,
          kernel_size=kernel,
          layer_order=layer_order)
      if norm_type == 'ln':
        p.lconv_tpl.conv_norm_layer_tpl = layers.LayerNorm.Params()
      else:
        p.lconv_tpl.conv_norm_layer_tpl = bn_layers.GroupNormLayer.Params().Set(
            num_groups=num_groups, cumulative=True)
      if not has_lconv:
        p.lconv_tpl = None
      if not has_fflayer_start:
        p.fflayer_start_tpl = None
      p.name = 'conformer'

      l = p.Instantiate()
      init_op = tf.global_variables_initializer()

      np.random.seed(None)
      inputs = 5 * np.random.normal(
          0.1, 0.5, [batch, max_seqlen, input_dim]).astype(np.float32)
      print(f'np.sum(inputs): {np.sum(inputs)}')
      inputs = tf.convert_to_tensor(inputs)

      seqlen = np.random.randint(
          low=1, high=max_seqlen + 1, size=(batch,), dtype=np.int32)
      print(repr(seqlen))
      seqlen = tf.convert_to_tensor(seqlen)
      paddings = py_utils.PaddingsFromLengths(seqlen, max_seqlen)

      base_output_map = l.FProp(
          l.theta, py_utils.NestedMap(features=inputs, paddings=paddings))
      base_outputs = base_output_map.features
      base_outputs *= tf.expand_dims(1. - paddings, -1)

      outputs = []
      state = l.zero_state(batch)
      for i in range(0, max_seqlen, stride):
        output, _, state = l.StreamStep(l.theta, inputs[:, i:(i + stride), :],
                                        paddings[:, i:(i + stride)], state)
        outputs.append(output)
      # [b, t, d]
      outputs = tf.concat(outputs, axis=1)
      outputs *= tf.expand_dims(1. - paddings, -1)

      with self.session(use_gpu=False) as sess:
        sess.run(init_op)
        expected, actual = sess.run([base_outputs, outputs])
        print(repr(expected))
        print(repr(actual))
        print(f'np.sum(np.abs(expected)): {np.sum(np.abs(expected))}')
        print(f'np.sum(np.abs(actual)): {np.sum(np.abs(actual))}')
        tol = 2.e-6 if testonly_skip_norm_layers else 2.e-5
        self.assertAllClose(expected, actual, atol=tol, rtol=tol)