Python DeepSpeech2示例

编程语言: Python

命名空间/包名称: deep_speech_model

方法/功能: DeepSpeech2

hotexamples.com的示例: 2

Python DeepSpeech2 - 已找到2个示例。这些是从开源项目中提取的最受好评的deep_speech_model.DeepSpeech2现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： deep_speech.py 项目： xiangj1/E2E

def model_fn(features, labels, mode, params):
    """Define model function for deep speech model.

  Args:
    features: a dictionary of input_data features. It includes the data
      input_length, label_length and the spectrogram features.
    labels: a list of labels for the input data.
    mode: current estimator mode; should be one of
      `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`.
    params: a dict of hyper parameters to be passed to model_fn.

  Returns:
    EstimatorSpec parameterized according to the input params and the
    current mode.
  """
    num_classes = params["num_classes"]
    input_length = features["input_length"]
    label_length = features["label_length"]
    features = features["features"]

    # Create DeepSpeech2 model.
    model = deep_speech_model.DeepSpeech2(flags_obj.rnn_hidden_layers,
                                          flags_obj.rnn_type,
                                          flags_obj.is_bidirectional,
                                          flags_obj.rnn_hidden_size,
                                          num_classes, flags_obj.use_bias)

    if mode == tf.estimator.ModeKeys.PREDICT:
        logits = model(features, training=False)
        predictions = {
            "classes": tf.argmax(logits, axis=2),
            "probabilities": logits,
            "logits": logits
        }
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # In training mode.
    logits = model(features, training=True)
    ctc_input_length = compute_length_after_conv(
        tf.shape(features)[1],
        tf.shape(logits)[1], input_length)
    # Compute CTC loss
    loss = tf.reduce_mean(
        tf.keras.backend.ctc_batch_cost(labels, logits, ctc_input_length,
                                        label_length))

    optimizer = tf.compat.v1.train.AdamOptimizer(
        learning_rate=flags_obj.learning_rate)
    global_step = tf.compat.v1.train.get_or_create_global_step()
    minimize_op = optimizer.minimize(loss, global_step=global_step)
    update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
    # Create the train_op that groups both minimize_ops and update_ops
    train_op = tf.group(minimize_op, update_ops)

    print('Saving Keras Model')
    kerasModel = tf.keras.Model(inputs=features, outputs=logits)
    # kerasModel.compile(optimizer=optimizer, loss=loss)
    kerasModel.save('myModel.h5')

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

示例#2

显示文件

def model_fn(features, labels, mode, params):
    """Define model function for deep speech model.
  Args:
    features: a dictionary of input_data features. It includes the data
      input_length, label_length and the spectrogram features.
    labels: a list of labels for the input data.
    mode: current estimator mode; should be one of
      `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`.
    params: a dict of hyper parameters to be passed to model_fn.
  Returns:
    EstimatorSpec parameterized according to the input params and the
    current mode.
  """
    num_classes = params["num_classes"]
    #input_length = features["input_length"]
    #label_length = features["label_length"]
    features = features["features"]
    print("############################")
    print(features)
    print("*****************************")
    print(labels)
    print("##############################")
    exit(0)
    # Create DeepSpeech2 model.
    model = deep_speech_model.DeepSpeech2(
        flags_obj.rnn_hidden_layers,
        flags_obj.rnn_type,
        flags_obj.is_bidirectional,
        flags_obj.rnn_hidden_size,
        num_classes,
        flags_obj.use_bias,
    )

    if mode == tf.estimator.ModeKeys.PREDICT:
        logits = model(features, training=False)
        predictions = {
            "classes": tf.argmax(logits, axis=2),
            "probabilities": tf.nn.softmax(logits),
            "logits": logits,
        }
        return tf.contrib.tpu.TPUEstimatorSpec(mode, predictions=predictions)
        

    # In training mode.
    logits = model(features, training=True)
    probs = tf.nn.softmax(logits)
    ctc_input_length = compute_length_after_conv(
        tf.shape(features)[1], tf.shape(probs)[1], input_length
    )
    # Compute CTC loss
    loss = tf.reduce_mean(ctc_loss(label_length, ctc_input_length, labels, probs))

    optimizer = tf.train.AdamOptimizer(learning_rate=flags_obj.learning_rate)
    if flags_obj.use_tpu:
      optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
    global_step = tf.train.get_or_create_global_step()
    minimize_op = optimizer.minimize(loss, global_step=global_step)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # Create the train_op that groups both minimize_ops and update_ops
    train_op = tf.group(minimize_op, update_ops)

    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op)