Exemplo n.º 1
0
def tower_loss(scope, feats, labels, seq_lens):
    """Calculate the total loss on a single tower running the deepSpeech model.

    This function builds the graph for computing the loss per tower(GPU).

    ARGS:
      scope: unique prefix string identifying the
             deepSpeech tower, e.g. 'tower_0'
      feats: Tensor of shape BxFxT representing the
             audio features (mfccs or spectrogram).
      labels: sparse tensor holding labels of each utterance.
      seq_lens: tensor of shape [batch_size] holding
              the sequence length per input utterance.
    Returns:
       Tensor of shape [batch_size] containing
       the total loss for a batch of data
    """

    # Build inference Graph.
    logits = deepSpeech.inference(feats, seq_lens, ARGS)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    _ = deepSpeech.loss(logits, labels, seq_lens)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)

    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name = 'total_loss')

    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name = 'avg')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss;
    # do the same for the averaged version of the losses.
    for loss in losses + [total_loss]:
        # Remove 'tower_[0-9]/' from the name in case this is a
        # multi-GPU training session. This helps the clarity
        # of presentation on tensorboard.
        loss_name = re.sub('%s_[0-9]*/' % helper_routines.TOWER_NAME, '',
                           loss.op.name)
        # Name each loss as '(raw)' and name the moving average
        # version of the loss as the original loss name.
        tf.summary.scalar(loss_name + '(raw)', loss)
        tf.summary.scalar(loss_name, loss_averages.average(loss))

    # Without this loss_averages_op would never run
    with tf.control_dependencies([loss_averages_op]):
        total_loss = tf.identity(total_loss)
    return total_loss
Exemplo n.º 2
0
def tower_loss(sess, scope, feats, labels, seq_lens):
    """Calculate the total loss on a single tower running the deepSpeech model.

    This function builds the graph for computing the loss per tower(GPU).

    ARGS:
      scope: unique prefix string identifying the
             deepSpeech tower, e.g. 'tower_0'
      feats: Tensor of shape BxFxT representing the
             audio features (mfccs or spectrogram).
      labels: sparse tensor holding labels of each utterance.
      seq_lens: tensor of shape [batch_size] holding
              the sequence length per input utterance.
    Returns:
       Tensor of shape [batch_size] containing
       the total loss for a batch of data
    """

    # Build inference Graph.
    logits = deepSpeech.inference(sess, feats, seq_lens, ARGS)

    # Build the portion of the Graph calculating the losses. Note that we will
    # assemble the total_loss using a custom function below.
    total_loss = deepSpeech.loss(logits, labels, seq_lens)

    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    loss_averages_op = loss_averages.apply([total_loss])

    # Attach a scalar summary to all individual losses and the total loss;
    # do the same for the averaged version of the losses.
    loss_name = total_loss.op.name
    # Name each loss as '(raw)' and name the moving average
    # version of the loss as the original loss name.
    tf.summary.scalar(loss_name + '(raw)', total_loss)
    tf.summary.scalar(loss_name, loss_averages.average(total_loss))

    # Without this loss_averages_op would never run
    with tf.control_dependencies([loss_averages_op]):
        total_loss = tf.identity(total_loss)

    return total_loss
Exemplo n.º 3
0
def evaluate():
    """ Evaluate deepSpeech modelfor a number of steps."""

    with tf.Graph().as_default() as graph:
        # Get feats and labels for deepSpeech.
        feats, labels, seq_lens = deepSpeech.inputs(ARGS.eval_data,
                                                    data_dir=ARGS.data_dir,
                                                    batch_size=ARGS.batch_size,
                                                    use_fp16=ARGS.use_fp16,
                                                    shuffle=True)
        session = tf.Session()

        # Build ops that computes the logits predictions from the
        # inference model.
        ARGS.keep_prob = 1.0  # Disable dropout during testing.
        logits = deepSpeech.inference(session, feats, seq_lens, ARGS)

        # Calculate predictions.
        output_log_prob = tf.nn.log_softmax(logits)
        decoder = tf.nn.ctc_greedy_decoder
        strided_seq_lens = deepSpeech.get_rnn_seqlen(seq_lens)
        predictions = decoder(output_log_prob, strided_seq_lens)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            ARGS.moving_avg_decay)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(ARGS.eval_dir, graph)

        while True:
            eval_once(session, saver, summary_writer, predictions, summary_op,
                      labels)

            if ARGS.run_once:
                break
            time.sleep(ARGS.eval_interval_secs)