Пример #1
0
    def model(self, feats, labels):
        ''' Build the model. '''
        x = self.resnet(feats)

        with tf.variable_scope("avg_pooling"):
            batch_t = tf.shape(x)[0]
            time_t = tf.shape(x)[1]
            feat, channel = x.shape.as_list()[2:]
            x = tf.reshape(x, [batch_t, time_t, feat * channel])
            x = self.pooling_layer(x, pooling_type='average')

        with tf.variable_scope("output_layer"):
            shape = x.shape.as_list()
            shape = shape[-1]
            hidden_dims = self.params().embedding_size
            y = x
            y = common_layers.linear(y,
                                     'dense-matmul', [shape, hidden_dims],
                                     has_bias=True)
            y = tf.layers.batch_normalization(y,
                                              axis=-1,
                                              momentum=0.99,
                                              training=self.train,
                                              name='dense-bn')
            embedding = y
            dense_output = y

        logits = self.logits_layer(dense_output, labels)
        model_outputs = {'logits': logits, 'embeddings': embedding}
        return model_outputs
Пример #2
0
def cross_entropy(logits,
                  labels,
                  input_length=None,
                  label_length=None,
                  smoothing=0.0,
                  reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS):
    '''
  cross entropy function for classfication and seq classfication
  :param, label_length, for seq task, this for target seq length, e.g. a b c </s>, 4
  '''
    del input_length

    onehot_labels = tf.cond(pred=tf.equal(
        tf.rank(logits) - tf.rank(labels), 1),
                            true_fn=lambda: tf.one_hot(
                                labels, tf.shape(logits)[-1], dtype=tf.int32),
                            false_fn=lambda: labels)

    if label_length is not None:
        max_len = tf.shape(logits)[1]
        weights = utils.len_to_mask(label_length, max_len)
    else:
        weights = 1.0

    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
                                           logits=logits,
                                           weights=weights,
                                           label_smoothing=smoothing,
                                           reduction=reduction)

    return loss
Пример #3
0
    def test_linear(self):
        '''test linear'''
        inputs = tf.random_uniform(shape=[4, 5], dtype=tf.float32,
                                   maxval=1.0)  # A 2D tensor
        shape = [5, 4]
        output = cl.linear(inputs, 'test_linear0', shape)
        output_shape = [4, 4]
        self.assertAllEqual(tf.shape(output), output_shape)

        inputs = tf.random_uniform(shape=[2, 4, 5],
                                   dtype=tf.float32,
                                   maxval=1.0)  # A 3D tensor
        shape = [5, 4]
        output = cl.linear(inputs, 'test_linear1', shape)
        output_shape = [2, 4, 4]
        self.assertAllEqual(tf.shape(output), output_shape)

        # A 4D tensor [B, C, H, W]
        inputs = tf.random_uniform(shape=[2, 3, 4, 5],
                                   dtype=tf.float32,
                                   maxval=1.0)
        shape = [5, 4]
        output = cl.linear(inputs, 'test_linear2', shape)
        output_shape = [2, 3, 4, 4]
        self.assertAllEqual(tf.shape(output), output_shape)
Пример #4
0
    def test_tdnn(self):
        '''test tdnn'''
        #A 3D Tensor [batch, in_width, in_channels]
        inputs = tf.random_uniform(shape=[2, 5, 3],
                                   dtype=tf.float32,
                                   maxval=1.0)
        in_dim = inputs.get_shape().as_list()[2]
        out_dim = 4
        context = [-2, -1, 0, 1, 2]
        output = cl.tdnn(inputs,
                         'test_tdnn0',
                         in_dim,
                         context,
                         out_dim,
                         method='splice_layer')
        out_shape = [2, 5, 4]
        self.assertAllEqual(tf.shape(output), out_shape)

        context = 2
        #output = cl.tdnn(inputs, 'test_tdnn1', in_dim, context, out_dim, method='splice_op')
        #self.assertAllEqual(tf.shape(output), out_shape)

        output = cl.tdnn(inputs,
                         'test_tdnn2',
                         in_dim,
                         context,
                         out_dim,
                         method='conv1d')
        self.assertAllEqual(tf.shape(output), out_shape)
Пример #5
0
 def test_attention(self):
     '''test attention'''
     # A 3D tensor [B, T, D]
     inputs = tf.random_uniform(shape=[2, 100, 512],
                                dtype=tf.float32,
                                maxval=1.0)
     attention_size = 256
     output, alpha = cl.attention(inputs,
                                  attention_size,
                                  return_alphas=True)
     output_shape = [2, 512]
     alpha_shape = [2, 100, 1]
     self.assertAllEqual(tf.shape(output), output_shape)
     self.assertAllEqual(tf.shape(alpha), alpha_shape)
Пример #6
0
def cut_or_padding(origin_t, new_length, padding_token=0):
    """
  If too long, cut the tensor; else pad the tensor.
  origin_t: [batch_size, time_steps_1] or [time_steps_1]
  new_t: [batch_size, time_steps_2] or [time_steps_2]
  """

    if len(origin_t.get_shape()) == 1:
        dim = 1
        cur_length = tf.shape(origin_t)[0]
    elif len(origin_t.get_shape()) == 2:
        dim = 2
        cur_length = tf.shape(origin_t)[1]
    else:
        raise ValueError("origin_t should be a tensor with rank 1 or 2.")

    def cut_tensor():
        if dim == 1:
            new_t = origin_t[:new_length]
        else:
            new_t = origin_t[:, :new_length]
        return new_t

    def pad_tail_tensor():
        if dim == 1:
            shape = tf.constant([1, 2])
            indices = tf.constant([[0, 1]])
        else:
            shape = tf.constant([2, 2])
            indices = tf.constant([[1, 1]])
        updates = [new_length - cur_length]
        paddings = tf.scatter_nd(indices, updates, shape)
        new_t = tf.pad(origin_t,
                       paddings,
                       "CONSTANT",
                       constant_values=padding_token)
        return new_t

    new_t = tf.cond(cur_length < new_length,
                    true_fn=pad_tail_tensor,
                    false_fn=cut_tensor)

    if dim == 1:
        new_t.set_shape([new_length])
    else:
        new_t.set_shape([origin_t.get_shape()[0], new_length])

    return new_t
Пример #7
0
    def call(self, inputs, training=None, mask=None):
        batch_size = tf.shape(inputs)[0]
        W_3d = tf.tile(tf.expand_dims(self.W, axis=0),
                       tf.stack([batch_size, 1, 1]))
        # [batch_size, steps, features]
        input_projection = tf.matmul(inputs, W_3d)

        if self.use_bias:
            input_projection += self.b

        input_projection = tf.tanh(input_projection)

        # [batch_size, steps, 1]
        similaritys = tf.reduce_sum(tf.multiply(input_projection,
                                                self.attention_context_vector),
                                    axis=2,
                                    keep_dims=True)

        # [batch_size, steps, 1]
        if mask is not None:
            attention_weights = masked_softmax(similaritys, mask, axis=1)
        else:
            attention_weights = tf.nn.softmax(similaritys, axis=1)

        # [batch_size, features]
        attention_output = tf.reduce_sum(tf.multiply(inputs,
                                                     attention_weights),
                                         axis=1)
        return attention_output
Пример #8
0
    def test_jieba_cut_op_no_file(self):
        ''' test jieba '''
        graph = tf.Graph()
        with graph.as_default():
            sentence_in = tf.placeholder(dtype=tf.string,
                                         shape=[None],
                                         name="sentence_in")

            sentence_out = self.build_op_no_file(sentence_in)
            shape_op = tf.shape(sentence_out)

            with self.cached_session(use_gpu=False, force_gpu=False) as sess:
                # self.assertShapeEqual(tf.shape(sentence_in), tf.shape(sentence_out))
                sentence_out_res = test_one(sess, sentence_out,
                                            {sentence_in: ["我爱北京天安门"]})
                self.assertEqual("我 爱 北京 天安门",
                                 sentence_out_res[0].decode("utf-8"))
                sentence_out_res = test_one(sess, sentence_out,
                                            {sentence_in: ["吉林省长春药店"]})
                self.assertEqual("吉林省 长春 药店",
                                 sentence_out_res[0].decode("utf-8"))
                sentence_out_res, shape_res = test_one(
                    sess, [sentence_out, shape_op],
                    {sentence_in: ["吉林省长春药店", "南京市长江大桥"]})
                self.assertEqual(
                    "吉林省 长春 药店\n南京市 长江大桥", "\n".join([
                        one_sen.decode("utf-8") for one_sen in sentence_out_res
                    ]))
                logging.info(f"shape: {shape_res}")
                self.assertAllEqual(shape_res, [2])
Пример #9
0
    def call(self, audio_data, sample_rate=None):
        """
    Caculate mfcc features of audio data.
    :param audio_data: the audio signal from which to compute spectrum.
                       Should be an (1, N) tensor.
    :param sample_rate: the samplerate of the signal we working with.
    :return: A float tensor of size (num_channels, num_frames, num_frequencies)
            containing mfcc features of every frame in speech.
    """
        p = self.config
        with tf.name_scope('mfcc'):

            if sample_rate == None:
                sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)

            assert_op = tf.assert_equal(tf.constant(p.sample_rate),
                                        tf.cast(sample_rate, dtype=tf.int32))
            with tf.control_dependencies([assert_op]):

                fbank_feats = self.fbank(audio_data, sample_rate)
                sample_rate = tf.cast(sample_rate, dtype=tf.int32)
                shape = tf.shape(fbank_feats)
                nframe = shape[0]
                nfbank = shape[1]
                fbank_feats = tf.reshape(fbank_feats, (1, nframe, nfbank))
                framepow_feats = self.framepow(audio_data, sample_rate)
                mfcc = py_x_ops.mfcc(fbank_feats,
                                     framepow_feats,
                                     sample_rate,
                                     use_energy=p.use_energy,
                                     cepstral_lifter=p.cepstral_lifter,
                                     coefficient_count=p.coefficient_count)
                return mfcc
Пример #10
0
    def scaled_dot_product_attention(q, k, v, mask):
        """
    The implementation of scaled attention.
    Args:
      v: (batch_size, seq_len_v, hidden_size)
      k: (batch_size, seq_len_k, hidden_size)
      q: (batch_size, seq_len_q, hidden_size)
      mask: (batch_size, seq_len_q, seq_len_k)

    Returns:
      output: (batch_size, seq_len_q, hidden_size)
      attention_weights: (batch_size, num_heads, seq_len_q, seq_len_k)
    """

        matmul_qk = tf.matmul(
            q, k, transpose_b=True)  # (batch_size, seq_len_q, seq_len_k)

        # Scaled
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

        # Masked
        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        # Normalized
        attention_weights = tf.nn.softmax(
            scaled_attention_logits,
            axis=-1)  # (batch_size, seq_len_q, seq_len_k)

        # Weighted sum
        output = tf.matmul(attention_weights,
                           v)  # (batch_size, seq_len_q, depth_v)

        return output, attention_weights
Пример #11
0
def accuracy(logits, labels):
    ''' accuracy candies
  params:
    logits: [B, ..., D]
    labels: [B, ...]
  return:
    accuracy tensor
  '''
    with tf.name_scope('accuracy'):
        assert_rank = tf.assert_equal(tf.rank(logits), tf.rank(labels) + 1)
        assert_shape = tf.assert_equal(tf.shape(logits)[:-1], tf.shape(labels))
        with tf.control_dependencies([assert_rank, assert_shape]):
            predictions = tf.argmax(logits, axis=-1, output_type=tf.int64)
            labels = tf.cast(labels, tf.int64)
            return tf.reduce_mean(
                tf.cast(tf.equal(predictions, labels), dtype=tf.float32))
Пример #12
0
    def tdnn_block(self, inputs):
        ''' TDNN layers. '''
        if 'tdnn_method' in self.netconf:
            tdnn_method = self.netconf['tdnn_method']
        else:
            # Runs faster, support discrete context, for now.
            tdnn_method = 'splice_layer'
        tdnn_contexts = self.netconf['tdnn_contexts']
        logging.info("tdnn_contexts : {}".format(tdnn_contexts))
        tdnn_dims = self.netconf['tdnn_dims']
        logging.info("tdnn_dims : {}".format(tdnn_dims))

        layer_num = len(tdnn_contexts)
        assert layer_num == len(tdnn_dims)

        channels = [self.input_channels] + tdnn_dims
        logging.info("tdnn_channels : {}".format(channels))

        input_h_t = tf.shape(inputs)[1]
        input_w = inputs.shape[2]
        input_c = inputs.shape[3]
        if tdnn_method == 'conv1d':
            # NHWC -> NW'C, W' = H * W
            inputs = tf.reshape(inputs, [-1, input_h_t * input_w, input_c])
            last_w = channels[0]
        else:
            inputs = tf.reshape(inputs, [-1, input_h_t, input_w * input_c])
            last_w = input_w * input_c

        downsample_input_len = self.input_len
        with tf.variable_scope('tdnn'):
            x = tf.identity(inputs)
            for index in range(layer_num):
                unit_name = 'unit-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    tdnn_name = 'tdnn-' + str(index + 1)
                    x = common_layers.tdnn(x,
                                           tdnn_name,
                                           last_w,
                                           tdnn_contexts[index],
                                           channels[index + 1],
                                           has_bias=True,
                                           method=tdnn_method)
                    last_w = channels[index + 1]
                    x = tf.nn.relu(x)
                    if self.netconf['use_bn']:
                        bn_name = 'bn' + str(index + 1)
                        x = tf.layers.batch_normalization(x,
                                                          axis=-1,
                                                          momentum=0.9,
                                                          training=self.train,
                                                          name=bn_name)
                    if self.netconf['use_dropout']:
                        x = tf.layers.dropout(x,
                                              self.netconf['dropout_rate'],
                                              training=self.train)
                    downsample_input_len = downsample_input_len

        return x, downsample_input_len
Пример #13
0
 def test_embedding_look_up(self):
     '''test embedding look up'''
     text_inputs = [0, 1, 2]
     vocab_size = 3
     embedding_size = 512
     output = cl.embedding_look_up(text_inputs, vocab_size, embedding_size)
     output_shape = [3, 512, 1]
     self.assertAllEqual(tf.shape(output), output_shape)
Пример #14
0
def shape_list(tensor):
    """Return list of dims, statically where possible."""
    tensor = tf.convert_to_tensor(tensor)

    if tensor.get_shape().dims is None:
        return tf.shape(tensor)

    static = tensor.get_shape().as_list()
    shape = tf.shape(tensor)

    ret = []
    for i, _ in enumerate(static):
        dim = static[i]
        if dim is None:
            dim = shape[i]
        ret.append(dim)
    return ret
Пример #15
0
def splice(feat, left_context, right_context):
    '''
  splice frame with context
    param: feat, tf.float32, [batch, time, feat]
    return: feat, tf.float32, [batch, time, feat*(left_context + 1 + right_context)]
    reference:
      https://github.com/kaldi-asr/kaldi/src/feat/feature-functions.cc#L205:6
  '''
    def _loop_continue(time, end_time, context, unused_left_context,
                       right_context, unused_output_tas):
        del unused_output_tas
        del unused_left_context
        return time < end_time

    def _loop_body(time, end_time, context, left_context, right_context,
                   output_tas):
        shape = tf.shape(context)
        B, _, D = shape[0], shape[1], shape[2]
        N = (1 + left_context + right_context) * D

        new_feat = context[:, time:time + left_context + 1 + right_context, :]
        new_feat = tf.reshape(new_feat, [B, N])
        new_output_tas = output_tas.write(time, new_feat)
        return (time + 1, end_time, context, left_context, right_context,
                new_output_tas)

    with tf.control_dependencies([
            tf.assert_greater_equal(left_context, 0),
            tf.assert_greater_equal(right_context, 0)
    ]):
        T = tf.shape(feat)[1]
        output_tas = _new_tensor_array('splice_feat_ta', T, dtype=tf.float32)
        time = tf.constant(0, tf.int32)
        first = tf.tile(feat[:, 0:1, :], [1, left_context, 1])
        last = tf.tile(feat[:, -1:, :], [1, right_context, 1])
        context = tf.concat([first, feat], axis=1)
        context = tf.concat([context, last], axis=1)

        loop_vars = (time, T, context, left_context, right_context, output_tas)

        parallel_iterations = 10
        shape_invariants = tf.nest.map_structure(
            lambda t: tf.TensorShape(None), loop_vars)

        (time, end_time, context, left_context, right_context,
         output_tas) = tf.while_loop(_loop_continue,
                                     _loop_body,
                                     loop_vars=loop_vars,
                                     shape_invariants=shape_invariants,
                                     parallel_iterations=parallel_iterations,
                                     swap_memory=False)
        del context
        del left_context
        del right_context

        batch_spliced_feats = output_tas.stack()
        batch_spliced_feats = tf.transpose(batch_spliced_feats, [1, 0, 2])
    return batch_spliced_feats
Пример #16
0
def shape_list(x):
    """Return list of dims, statically where possible."""
    x = tf.convert_to_tensor(x)

    # If unknown rank, return dynamic shape
    if x.get_shape().dims is None:
        return tf.shape(x)

    static = x.get_shape().as_list()
    shape = tf.shape(x)

    ret = []
    for i, _ in enumerate(static):
        dim = static[i]
        if dim is None:
            dim = shape[i]
        ret.append(dim)
    return ret
Пример #17
0
    def test_chinese_word(self):
        config = utils.load_config(self.config_file)
        class_num = config["data"]["task"]["classes"]["num_classes"]
        data_config = config["data"]
        task_config = data_config["task"]
        task_config["language"] = "chinese"
        task_config["split_by_space"] = False
        task_config["use_word"] = True
        data_config = config["data"]
        data_config["train"]["paths"] = \
          ["egs/mock_text_cls_data/text_cls/v1/data/train.chinese_word.txt"]
        data_config["eval"]["paths"] = \
          ["egs/mock_text_cls_data/text_cls/v1/data/eval.chinese_word.txt"]
        data_config["infer"]["paths"] = \
          ["egs/mock_text_cls_data/text_cls/v1/data/test.chinese_word.txt"]
        task_config[
            "text_vocab"] = "egs/mock_text_cls_data/text_cls/v1/data/text_vocab.chinese_word.txt"
        task_config["need_shuffle"] = False
        config["model"]["split_token"] = ""
        task_config["preparer"]["reuse"] = False

        task = TextClsTask(config, utils.TRAIN)

        # test offline data
        data = task.dataset()
        self.assertTrue("input_x_dict" in data
                        and "input_x" in data["input_x_dict"])
        self.assertTrue("input_y_dict" in data
                        and "input_y" in data["input_y_dict"])
        with self.cached_session(use_gpu=False, force_gpu=False) as sess:
            sess.run(data["iterator"].initializer)
            res = sess.run([
                data["input_x_dict"]["input_x"],
                data["input_y_dict"]["input_y"]
            ])
            logging.debug(res[0][0])
            logging.debug(res[1][0])
            self.assertAllEqual(res[0][0][:5], [2, 0, 0, 0, 0])
            self.assertEqual(np.shape(res[1]), (32, class_num))

        # test online data
        export_inputs = task.export_inputs()
        self.assertTrue("export_inputs" in export_inputs
                        and "input_sentence" in export_inputs["export_inputs"])
        input_sentence = export_inputs["export_inputs"]["input_sentence"]
        input_x = export_inputs["model_inputs"]["input_x"]
        shape_op = tf.shape(input_x)

        with self.cached_session(use_gpu=False, force_gpu=False) as sess:
            res, shape_res = sess.run([input_x, shape_op],
                                      feed_dict={input_sentence: ["我很愤怒"]})
            logging.debug(res[0])
            logging.debug(np.shape(res[0]))
            logging.debug(f"shape: {shape_res}")
            self.assertAllEqual(shape_res, [1, 1024])
            self.assertAllEqual(res[0][:5], [4, 5, 0, 0, 0])
Пример #18
0
    def _loop_body(time, end_time, context, left_context, right_context,
                   output_tas):
        shape = tf.shape(context)
        B, _, D = shape[0], shape[1], shape[2]
        N = (1 + left_context + right_context) * D

        new_feat = context[:, time:time + left_context + 1 + right_context, :]
        new_feat = tf.reshape(new_feat, [B, N])
        new_output_tas = output_tas.write(time, new_feat)
        return (time + 1, end_time, context, left_context, right_context,
                new_output_tas)
Пример #19
0
    def test_maxpool(self):
        '''test maxpool'''
        inputs = tf.reshape(tf.range(25), shape=[1, 5, 5, 1])  #A 4D tensor
        ksize = [3, 3]
        strides = [1, 1]
        output = cl.max_pool(inputs, ksize, strides)
        output_shape = [1, 3, 3, 1]
        self.assertAllEqual(tf.shape(output), output_shape)

        output_true = tf.constant([[[[12], [13], [14]], [[17], [18], [19]],
                                    [[22], [23], [24]]]])
        self.assertAllEqual(output, output_true)
Пример #20
0
   def _reshape_mask(mask):
       """
 repeat mask for multi head
   Input shape: (Batch size, steps)
   Output shape: (Batch size * head num, steps)
 """
       if mask is None:
           return None
       seq_len = tf.shape(mask)[1]
       mask = tf.expand_dims(mask, axis=1)
       mask = tf.tile(mask, [1, self.head_num, 1])
       return tf.reshape(mask, shape=(-1, seq_len))
Пример #21
0
 def test_conv2d(self):
     '''test conv2d'''
     inputs = tf.random_uniform(shape=[2, 5, 5, 3],
                                dtype=tf.float32,
                                maxval=1.0)  #A 4D Tensor
     filter_size = [3, 3]
     in_channels = inputs.get_shape().as_list()[3]
     out_channels = 4
     strides = [1, 1]
     output = cl.conv2d(inputs, 'test_conv2d', filter_size, in_channels,
                        out_channels, strides)
     output_shape = [2, 5, 5, 4]
     self.assertAllEqual(tf.shape(output), output_shape)
Пример #22
0
    def linear_block(self, x):
        '''
    linear layer for dim reduction
    x: shape [batch, time, feat, channel]
    output: shape [b, t, f]
    '''
        batch_t = tf.shape(x)[0]
        time_t = tf.shape(x)[1]
        feat, channel = x.shape.as_list()[2:]
        linear_num = self.netconf['linear_num']

        if linear_num > 0:
            with tf.variable_scope('linear'):
                x = tf.reshape(x, [batch_t * time_t, feat * channel])

                if self.netconf['use_dropout']:
                    x = tf.layers.dropout(x,
                                          self.netconf['dropout_rate'],
                                          training=self.train)

                x = common_layers.linear(x, 'linear1',
                                         [feat * channel, linear_num])

                x = tf.nn.relu(x)

                if self.netconf['use_bn']:
                    bn_name = 'bn_linear'
                    x = tf.layers.batch_normalization(x,
                                                      axis=-1,
                                                      momentum=0.9,
                                                      training=self.train,
                                                      name=bn_name)

                x = tf.reshape(x, [batch_t, time_t, linear_num])
        else:
            logging.info('linear_num <= 0, only apply reshape.')
            x = tf.reshape(x, [batch_t, time_t, feat * channel])

        return x
Пример #23
0
 def test_conv_pool(self):
     '''test  conv pool'''
     # A 4D tensor [B, H, W, C]
     embedded_chars_expanded = tf.random_uniform(shape=[2, 7, 7, 1],
                                                 dtype=tf.float32,
                                                 maxval=1.0)
     filter_sizes = [3, 5]
     embedding_size = 3
     num_filters = 3
     sequence_length = 5
     output = cl.conv_pool(embedded_chars_expanded, filter_sizes,
                           embedding_size, num_filters, sequence_length)
     output_shape = [30, 6]
     self.assertAllEqual(tf.shape(output), output_shape)
Пример #24
0
    def call(self, inputs, training=None, mask=None):
        """
    The implementation of Multi-headed attention.
    Args:
      inputs = (v, k, q)
      q: (batch_size, seq_len_q, hidden_size)
      k: (batch_size, seq_len_k, hidden_size)
      v: (batch_size, seq_len_v, hidden_size)
      mask: (batch_size, seq_len_q, seq_len_k)

    Returns:
      output: (batch_size, seq_len_q, hidden_size)
      attention_weights: (batch_size, num_heads, seq_len_q, seq_len_k)
    """
        q, k, v = inputs
        batch_size = tf.shape(q)[0]

        q = self.wq(q)  # (batch_size, seq_len_q, hidden_size)
        k = self.wk(k)  # (batch_size, seq_len_k, hidden_size)
        v = self.wv(v)  # (batch_size, seq_len_v, hidden_size)

        q = self.split_heads(
            q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(
            k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(
            v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
        scaled_attention, attention_weights = self.scaled_dot_product_attention(
            q, k, v, mask)

        scaled_attention = tf.transpose(
            scaled_attention,
            perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

        concat_attention = tf.reshape(
            scaled_attention,
            (batch_size, -1,
             self.hidden_size))  # (batch_size, seq_len_q, hidden_size)

        output = self.dense(
            concat_attention)  # (batch_size, seq_len_q, hidden_size)

        return output, attention_weights
Пример #25
0
def delta_delta(feat, order=2):
    '''
  params:
    feat: a tensor of shape [nframe, nfbank] or [nframe, nfbank, 1]
  return: [nframe, nfbank, 3]
  '''
    feat = tf.cond(tf.equal(tf.rank(feat), 3),
                   true_fn=lambda: feat[:, :, 0],
                   false_fn=lambda: feat)

    shape = tf.shape(feat)
    # [nframe nfbank*3]
    nframe = shape[0]
    nfbank = shape[1]
    delta = py_x_ops.delta_delta(feat, order=order)
    feat_with_delta_delta = tf.reshape(delta, (nframe, nfbank, (order + 1)))
    return feat_with_delta_delta
Пример #26
0
def batch_extract_feature(waveforms, params):
    ''' waveforms: [batch, samples, audio_channels]
  return: features [batch, nframes, feat_size, channles]
  '''
    def _to_tensor_array(name, v, clear_after_read=None):
        ''' create TensorArray from v, of size batch.'''
        ta = tf.TensorArray(v.dtype,
                            batch,
                            name=name,
                            clear_after_read=clear_after_read)
        ta = ta.unstack(v)
        return ta

    def _loop_continue(time, inputs, unused_output_tas):
        del unused_output_tas
        batch = tf.shape(inputs)[0]
        return time < batch

    def _loop_body(time, inputs, output_tas):
        feat = extract_feature(inputs[time, ...], params)
        new_output_tas = output_tas.write(time, feat)
        return (time + 1, inputs, new_output_tas)

    batch = tf.shape(waveforms)[0]
    output_tas = _new_tensor_array('batch_feat', batch, dtype=tf.float32)
    time = tf.constant(0, tf.int32)
    loop_vars = (time, waveforms, output_tas)

    parallel_iterations = 10
    shape_invariants = tf.nest.map_structure(lambda t: tf.TensorShape(None),
                                             loop_vars)

    (time, inputs,
     output_tas) = tf.while_loop(_loop_continue,
                                 _loop_body,
                                 loop_vars=loop_vars,
                                 shape_invariants=shape_invariants,
                                 parallel_iterations=parallel_iterations,
                                 swap_memory=False)
    del inputs

    batch_feats = output_tas.stack()
    return batch_feats
def splice_layer(x, name, context):
  '''
  Splice a tensor along the last dimension with context.
  e.g.:
  t = [[[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]]
  splice_tensor(t, [0, 1]) =
      [[[1, 2, 3, 4, 5, 6],
        [4, 5, 6, 7, 8, 9],
        [7, 8, 9, 7, 8, 9]]]

  Args:
    tensor: a tf.Tensor with shape (B, T, D) a.k.a. (N, H, W)
    context: a list of context offsets

  Returns:
    spliced tensor with shape (..., D * len(context))
  '''
  with tf.variable_scope(name):
    input_shape = tf.shape(x)
    B, T = input_shape[0], input_shape[1]
    context_len = len(context)
    array = tf.TensorArray(x.dtype, size=context_len)
    for idx, offset in enumerate(context):
      begin = offset
      end = T + offset
      if begin < 0:
        begin = 0
        sliced = x[:, begin:end, :]
        tiled = tf.tile(x[:, 0:1, :], [1, abs(offset), 1])
        final = tf.concat((tiled, sliced), axis=1)
      else:
        end = T
        sliced = x[:, begin:end, :]
        tiled = tf.tile(x[:, -1:, :], [1, abs(offset), 1])
        final = tf.concat((sliced, tiled), axis=1)
      array = array.write(idx, final)
    spliced = array.stack()
    spliced = tf.transpose(spliced, (1, 2, 0, 3))
    spliced = tf.reshape(spliced, (B, T, -1))
  return spliced
Пример #28
0
def _make_example(uttids, feats, ilens, targets, olens):
    features = {
        'uttids':
        uttids,
        'inputs':
        tf.expand_dims(feats, axis=-1) if not isinstance(feats, np.ndarray)
        else np.expand_dims(feats, axis=-1),
        'input_length':
        ilens,
        'targets':
        targets,
        'target_length':
        olens
    }
    labels = {
        'ctc':
        tf.ones(tf.shape(feats)[0])
        if not isinstance(feats, np.ndarray) else np.ones(feats.shape[0])
    }  # dummy data for dummy loss function
    return features, labels
Пример #29
0
  def call(self, inputs: list, **kwargs) -> typing.Any:
    """
        The computation logic of DynamicPoolingLayer.
        :param inputs: two input tensors.
        """
    self._validate_dpool_size()
    x, dpool_index = inputs
    dpool_shape = tf.shape(dpool_index)
    batch_index_one = tf.expand_dims(
        tf.expand_dims(tf.range(dpool_shape[0]), axis=-1), axis=-1)
    batch_index = tf.expand_dims(
        tf.tile(batch_index_one, [1, self._msize1, self._msize2]), axis=-1)
    dpool_index_ex = tf.concat([batch_index, dpool_index], axis=3)
    x_expand = tf.gather_nd(x, dpool_index_ex)
    stride1 = self._msize1 // self._psize1
    stride2 = self._msize2 // self._psize2

    x_pool = tf.nn.max_pool(x_expand, [1, stride1, stride2, 1],
                            [1, stride1, stride2, 1], "VALID")
    return x_pool
Пример #30
0
def transform_preprocess(labels=None, blank_index=None, num_class=None):
  ''' Ensure that the value of blank_index is in a reasonable range,
      and transform the DenseTensor labels to a SparseTensor '''
  if blank_index is None or blank_index < 0:
    raise ValueError('blank_index must be greater than or equal to zero')

  if not num_class is None and blank_index > (num_class - 1):
    raise ValueError('blank_index must be less than or equal to num_class - 1')

  if labels is None:
    return None

  if not isinstance(labels, tf.SparseTensor):
    labels = tf.cast(labels, tf.int32)
    labels_idx = tf.where(tf.not_equal(labels, 0))
    labels_values = tf.gather_nd(labels, labels_idx)
    labels_shape = tf.cast(tf.shape(labels), dtype=tf.int64)
    labels = tf.SparseTensor(
        indices=labels_idx, values=labels_values, dense_shape=labels_shape)

  return labels