Exemplo n.º 1
0
    def evaluate(self, outputs, references, reference_seq_length):
        '''evaluate the output of the decoder

        args:
            outputs: the outputs of the decoder as a dictionary
            references: the references as a dictionary
            reference_seq_length: the sequence lengths of the references

        Returns:
            the error of the outputs
        '''

        #stack all the logits except the final logits
        stacked_outputs = {
            t:ops.seq2nonseq(outputs[t][0], outputs[t][1])
            for t in outputs}


        #create the stacked targets
        stacked_targets = {
            t:tf.cast(ops.seq2nonseq(references[t],
                                     reference_seq_length[t]), tf.int32)
            for t in references}

        #compute the edit distance
        losses = [
            tf.reduce_mean(tf.reduce_mean(tf.cast(tf.not_equal(
                stacked_outputs[o], stacked_targets[o]), tf.float32)))
            for o in outputs]

        loss = tf.reduce_mean(losses)

        return loss
Exemplo n.º 2
0
    def update_evaluation_loss(self, loss, outputs, references,
                               reference_seq_length):
        '''update the evaluation loss

        args:
            loss: the current evaluation loss
            outputs: the outputs of the decoder as a dictionary
            references: the references as a dictionary
            reference_seq_length: the sequence lengths of the references

        Returns:
            an op to update the evalution loss
        '''

        #create a valiable to hold the total number of reference targets
        num_targets = tf.get_variable(
            name='num_targets',
            shape=[],
            dtype=tf.float32,
            initializer=tf.zeros_initializer(),
            trainable=False
        )

        #stack all the logits
        stacked_outputs = {
            t:ops.seq2nonseq(outputs[t][0], outputs[t][1])
            for t in outputs}


        #create the stacked targets
        stacked_targets = {
            t:tf.cast(ops.seq2nonseq(references[t],
                                     reference_seq_length[t]), tf.int32)
            for t in references}

        #compute the number of errors
        errors = [
            tf.reduce_sum(tf.reduce_sum(tf.cast(tf.not_equal(
                stacked_outputs[o], stacked_targets[o]), tf.float32)))
            for o in outputs]

        errors = tf.reduce_sum(errors)

        #compute the number of targets in this batch
        batch_targets = tf.reduce_sum([
            tf.reduce_sum(lengths)
            for lengths in reference_seq_length.values()])

        new_num_targets = num_targets + tf.cast(batch_targets, tf.float32)

        #an operation to update the loss
        update_loss = loss.assign(
            (loss*num_targets + errors)/new_num_targets).op

        #add an operation to update the number of targets
        with tf.control_dependencies([update_loss]):
            update_loss = num_targets.assign(new_num_targets).op

        return update_loss
Exemplo n.º 3
0
def cross_entropy_eos(targets, logits, logit_seq_length, target_seq_length):
    '''
    cross enthropy loss with an end of sequence label added

    Args:
        targets: a dictionary of [batch_size x time x ...] tensor containing
            the targets
        logits: a dictionary of [batch_size x time x ...] tensor containing
            the logits
        logit_seq_length: a dictionary of [batch_size] vectors containing
            the logit sequence lengths
        target_seq_length: a dictionary of [batch_size] vectors containing
            the target sequence lengths

    Returns:
        a scalar value containing the loss
    '''

    with tf.name_scope('cross_entropy_loss'):
        losses = []
        batch_size = tf.shape(targets.values()[0])[0]

        for t in targets:
            with tf.name_scope('cross_entropy_loss'):

                output_dim = tf.shape(logits[t])[2]

                #get the logits for the final timestep
                indices = tf.stack([tf.range(batch_size),
                                    logit_seq_length[t] - 1],
                                   axis=1)
                final_logits = tf.gather_nd(logits[t], indices)

                #stack all the logits except the final logits
                stacked_logits = ops.seq2nonseq(logits[t],
                                                logit_seq_length[t] - 1)

                #create the stacked targets
                stacked_targets = ops.seq2nonseq(targets[t],
                                                 target_seq_length[t])

                #create the targets for the end of sequence labels
                final_targets = tf.tile([output_dim-1], [batch_size])

                #add the final logits and targets
                stacked_logits = tf.concat([stacked_logits, final_logits], 0)
                stacked_targets = tf.concat([stacked_targets, final_targets], 0)

                #compute the cross-entropy loss
                losses.append(tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=stacked_logits,
                        labels=stacked_targets)))

        loss = tf.reduce_sum(losses)

    return loss
Exemplo n.º 4
0
def marigin_loss(targets, logits, logit_seq_length, target_seq_length):
    '''
    marigin loss

    Args:
        targets: a dictionary of [batch_size x time x ...] tensor containing
            the targets
        logits: a dictionary of [batch_size x time x ...] tensor containing
            the logits
        logit_seq_length: a dictionary of [batch_size] vectors containing
            the logit sequence lengths
        target_seq_length: a dictionary of [batch_size] vectors containing
            the target sequence lengths

    Returns:
        a scalar value containing the loss
    '''

    with tf.name_scope('marigin_loss'):
        losses = []

        for t in targets:
            #stack the logits
            stacked_logits = tf.squeeze(
                ops.seq2nonseq(logits[t], logit_seq_length[t]), [1])
            stacked_probs = tf.nn.sigmoid(stacked_logits)

            #create the stacked targets
            stacked_targets = tf.to_float(
                ops.seq2nonseq(targets[t], target_seq_length[t]))

            #compute the lower and upper marigins
            lower = tf.square(tf.maximum(0.0, stacked_probs - 0.1))
            upper = tf.square(tf.maximum(0.0, 0.9 - stacked_probs))

            #compute the loss
            losses.append(
                tf.reduce_mean(stacked_targets * upper +
                               (1 - stacked_targets) * lower))

        loss = tf.reduce_sum(losses)

    return loss
Exemplo n.º 5
0
def cross_entropy(targets, logits, logit_seq_length, target_seq_length):
    '''
    cross enthropy loss

    Args:
        targets: a dictionary of [batch_size x time x ...] tensor containing
            the targets
        logits: a dictionary of [batch_size x time x ...] tensor containing
            the logits
        logit_seq_length: a dictionary of [batch_size] vectors containing
            the logit sequence lengths
        target_seq_length: a dictionary of [batch_size] vectors containing
            the target sequence lengths

    Returns:
        a scalar value containing the loss
    '''

    with tf.name_scope('cross_entropy_loss'):
        losses = []

        for t in targets:
            #stack the logits
            stacked_logits = ops.seq2nonseq(logits[t], logit_seq_length[t])

            #create the stacked targets
            stacked_targets = ops.seq2nonseq(targets[t],
                                             target_seq_length[t])
            stacked_targets = tf.cast(stacked_targets, tf.int32)

            losses.append(tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=stacked_logits,
                    labels=stacked_targets)))

        loss = tf.reduce_sum(losses)

    return loss
Exemplo n.º 6
0
def sigmoid_cross_entropy(targets, logits, logit_seq_length, target_seq_length):
    '''
    Sigmoid cross entropy

    Args:
        targets: a dictionary of [batch_size x time x ...] tensor containing
            the targets
        logits: a dictionary of [batch_size x time x ...] tensor containing
            the logits
        logit_seq_length: a dictionary of [batch_size] vectors containing
            the logit sequence lengths
        target_seq_length: a dictionary of [batch_size] vectors containing
            the target sequence lengths

    Returns:
        a scalar value containing the loss
    '''

    with tf.name_scope('sigmoid_cross_entropy_loss'):
        losses = []

        for t in targets:
            #stack all the logits except the final logits
            stacked_logits = ops.seq2nonseq(logits[t], logit_seq_length[t])

            #create the stacked targets
            stacked_targets = ops.seq2nonseq(
                tf.cast(targets[t], tf.float32),
                target_seq_length[t])

            losses.append(tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=stacked_logits,
                    labels=stacked_targets)))

        loss = tf.reduce_sum(losses)

    return loss