示例#1
0
    def compute_loss(self, targets, logits, logit_seq_length,
                     target_seq_length):
        '''
        Compute the loss

        Creates the operation to compute the cross-enthropy loss for every input
        frame (if you want to have a different loss function, overwrite this
        method)

        Args:
            targets: a list that contains a Bx1 tensor containing the targets
                for eacht time step where B is the batch size
            logits: a list that contains a BxO tensor containing the output
                logits for eacht time step where O is the output dimension
            logit_seq_length: the length of all the input sequences as a vector
            target_seq_length: the length of all the target sequences as a
                vector

        Returns:
            a scalar value containing the loss
        '''

        with tf.variable_scope('weight_loss'):
            trainable_weights = tf.trainable_variables()
            weight_loss = 0
            for trainable in trainable_weights:
                weight_loss += tf.nn.l2_loss(trainable)
            weight_loss = weight_loss / len(trainable_weights)

        with tf.name_scope('cross_enthropy_loss'):

            #training starts at t=1.
            targets_t_one = targets[:, 1:, :]
            target_seq_length_t_one = target_seq_length - 1

            #convert to non sequential data
            nonseq_targets = seq_convertors.seq2nonseq(
                targets_t_one, target_seq_length_t_one)
            nonseq_logits = seq_convertors.seq2nonseq(logits, logit_seq_length)

            #make a vector out of the targets
            nonseq_targets = tf.reshape(nonseq_targets, [-1])

            #one hot encode the targets
            #pylint: disable=E1101
            nonseq_targets = tf.one_hot(nonseq_targets,
                                        int(nonseq_logits.get_shape()[1]))

            #compute the cross-enthropy loss
            loss = tf.reduce_sum(
                tf.nn.softmax_cross_entropy_with_logits(
                    nonseq_logits, nonseq_targets))

            loss = loss + self.l2_cost_weight * weight_loss
        return loss
示例#2
0
    def compute_loss(self, targets, logits, logit_seq_length,
                     target_seq_length):
        '''
        Compute the loss

        Creates the operation to compute the cross-enthropy loss for every input
        frame (if you want to have a different loss function, overwrite this
        method)

        Args:
            targets: a list that contains a Bx1 tensor containing the targets
                for eacht time step where B is the batch size
            logits: a list that contains a BxO tensor containing the output
                logits for eacht time step where O is the output dimension
            logit_seq_length: the length of all the input sequences as a vector
            target_seq_length: the length of all the target sequences as a
                vector

        Returns:
            a scalar value containing the loss
        '''

        with tf.name_scope('cross_enthropy_loss'):

            #convert to non sequential data
            nonseq_targets = seq_convertors.seq2nonseq(targets,
                                                       target_seq_length)
            nonseq_logits = seq_convertors.seq2nonseq(logits, logit_seq_length)

            #make a vector out of the targets
            nonseq_targets = tf.reshape(nonseq_targets, [-1])

            #one hot encode the targets
            #pylint: disable=E1101
            nonseq_targets = tf.one_hot(nonseq_targets,
                                        int(nonseq_logits.get_shape()[1]))

            # Evaluate model
            # argmax取的是最大值的下标
            correct_pred = tf.equal(tf.argmax(nonseq_logits, 1),
                                    tf.argmax(nonseq_targets, 1))
            true_count = tf.reduce_sum(tf.cast(correct_pred, tf.float32))

            # loss
            loss = tf.reduce_sum(
                tf.nn.softmax_cross_entropy_with_logits(logits=nonseq_logits,
                                                        labels=nonseq_targets))

            #compute the cross-enthropy loss
            return loss, true_count
示例#3
0
    def compute_loss(self, targets, logits, logit_seq_length,
                     target_seq_length):
        '''
        Compute the loss

        Creates the operation to compute the CTC loss for every input
        frame (if you want to have a different loss function, overwrite this
        method)

        Args:
            targets: a list that contains a Bx1 tensor containing the targets
                for eacht time step where B is the batch size
            logits: a list that contains a BxO tensor containing the output
                logits for eacht time step where O is the output dimension
            logit_seq_length: the length of all the input sequences as a vector
            target_seq_length: the length of all the target sequences as a
                vector

        Returns:
            a scalar value containing the loss
        '''

        #get the batch size
        batch_size = int(target_seq_length.get_shape()[0])

        #convert the targets into a sparse tensor representation
        indices = tf.concat(0, [tf.concat(1, [tf.tile([s], target_seq_length[s])
                                              , tf.range(target_seq_length[s])])
                                for s in range(len(batch_size))])
        values = tf.reshape(seq_convertors.seq2nonseq(logits, logit_seq_length),
                            [-1])
        shape = [batch_size, len(targets)]
        sparse_targets = tf.SparseTensor(indices, values, shape)

        tf.nn.ctc_loss(tf.pack(logits), sparse_targets, logit_seq_length)
示例#4
0
    def compute_loss(self, targets, logits, logit_seq_length,
                     target_seq_length):
        '''
        Compute the loss

        Creates the operation to compute the cross-enthropy loss for every input
        frame (if you want to have a different loss function, overwrite this
        method)

        Args:
            targets: a list that contains a Bx1 tensor containing the targets
                for eacht time step where B is the batch size
            logits: a list that contains a BxO tensor containing the output
                logits for eacht time step where O is the output dimension
            logit_seq_length: the length of all the input sequences as a vector
            target_seq_length: the length of all the target sequences as a
                vector

        Returns:
            a scalar value containing the loss
        '''

        with tf.name_scope('cross_enthropy_loss'):

            #convert to non sequential data
            nonseq_targets = seq_convertors.seq2nonseq(targets,
                                                       target_seq_length)
            nonseq_logits = seq_convertors.seq2nonseq(logits, logit_seq_length)

            #make a vector out of the targets
            nonseq_targets = tf.reshape(nonseq_targets, [-1])

            #one hot encode the targets
            #pylint: disable=E1101
            nonseq_targets = tf.one_hot(nonseq_targets,
                                        int(nonseq_logits.get_shape()[1]))

            #compute the cross-enthropy loss
            return tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(
                nonseq_logits, nonseq_targets))
示例#5
0
    def compute_loss(self, targets, logits, logit_seq_length,
                     target_seq_length):
        '''
        Compute the loss

        Creates the operation to compute the CTC loss for every input
        frame (if you want to have a different loss function, overwrite this
        method)

        Args:
            targets: a [batch_size, max_target_length, 1] tensor containing the
                targets
            logits: a [batch_size, max_input_length, dim] tensor containing the
                inputs
            logit_seq_length: the length of all the input sequences as a vector
            target_seq_length: the length of all the target sequences as a
                vector

        Returns:
            a scalar value containing the loss
        '''

        with tf.name_scope('CTC_loss'):

            #get the batch size
            batch_size = int(targets.get_shape()[0])

            #convert the targets into a sparse tensor representation
            indices = tf.concat([
                tf.concat([
                    tf.expand_dims(tf.tile([s], [target_seq_length[s]]), 1),
                    tf.expand_dims(tf.range(target_seq_length[s]), 1)
                ], 1) for s in range(batch_size)
            ], 0)

            values = tf.reshape(
                seq_convertors.seq2nonseq(targets, target_seq_length), [-1])

            shape = [batch_size, int(targets.get_shape()[1])]

            sparse_targets = tf.SparseTensor(tf.cast(indices, tf.int64),
                                             values, shape)

            loss = tf.reduce_sum(
                tf.nn.ctc_loss(sparse_targets,
                               logits,
                               logit_seq_length,
                               time_major=False))

        return loss
示例#6
0
    def get_outputs(self, logits, logits_seq_length):
        '''
        Put the classifier output logits through a softmax

        Args:
            logits: A list containing a 1xO tensor for each timestep where O
                is the classifier output dimension
            logits_seq_length: the logits sequence length
        Returns:
            An NxO tensor containing posterior distributions
        '''

        #convert logits to non sequence for the softmax computation
        logits = seq_convertors.seq2nonseq(logits, logits_seq_length)

        return tf.nn.softmax(logits)
示例#7
0
    def __init__(self, classifier, input_dim, max_length):
        '''
        NnetDecoder constructor, creates the decoding graph

        Args:
            classifier: the classifier that will be used for decoding
            input_dim: the input dimension to the nnnetgraph
        '''

        self.graph = tf.Graph()
        self.max_length = max_length

        with self.graph.as_default():

            #create the inputs placeholder
            self.inputs = tf.placeholder(tf.float32,
                                         shape=[max_length, input_dim],
                                         name='inputs')

            #create the sequence length placeholder
            self.seq_length = tf.placeholder(tf.int32,
                                             shape=[1],
                                             name='seq_length')

            split_inputs = tf.unstack(tf.expand_dims(self.inputs, 1))

            #create the decoding graph
            logits, _, self.saver, _ = classifier(split_inputs,
                                                  self.seq_length,
                                                  is_training=False,
                                                  reuse=False,
                                                  scope='Classifier')

            #convert logits to non sequence for the softmax computation
            logits = seq_convertors.seq2nonseq(logits, self.seq_length)

            #compute the outputs
            self.outputs = tf.nn.softmax(logits)

            # merge all summary during the decoding
            self.merged = tf.summary.merge_all()
            self.summarywriter = tf.summary.FileWriter(
                logdir="tf-exp/decode_vis", graph=self.graph)
            self.decode_visualisation = False

        #specify that the graph can no longer be modified after this point
        self.graph.finalize()
示例#8
0
    def get_outputs(self, logits, logits_seq_length):
        '''
        Put the classifier output logits through a softmax

        Args:
            logits: A list containing a 1xO tensor for each timestep where O
                is the classifier output dimension
            logits_seq_length: the logits sequence length
        Returns:
            An NxO tensor containing posterior distributions
        '''

        # convert logits to non sequence for the softmax computation
        logits = seq_convertors.seq2nonseq(logits, logits_seq_length)
        '''
        softmax(logits, dim=-1, name=None):
        Computes softmax activations.
        For each batch `i` and class `j` we have
        softmax = exp(logits) / reduce_sum(exp(logits), dim)
        '''
        return tf.nn.softmax(logits)
示例#9
0
    def compute_loss(self, targets, logits, logit_seq_length,
                     target_seq_length):
        '''
        Compute the loss

        Creates the operation to compute the CTC loss for every input
        frame (if you want to have a different loss function, overwrite this
        method)

        Args:
            targets: a list that contains a Bx1 tensor containing the targets
                for eacht time step where B is the batch size
            logits: a list that contains a BxO tensor containing the output
                logits for eacht time step where O is the output dimension
            logit_seq_length: the length of all the input sequences as a vector
            target_seq_length: the length of all the target sequences as a
                vector

        Returns:
            a scalar value containing the loss
        '''

        #get the batch size
        batch_size = int(target_seq_length.get_shape()[0])

        #convert the targets into a sparse tensor representation
        indices = tf.concat(0, [
            tf.concat(1, [
                tf.tile([s], target_seq_length[s]),
                tf.range(target_seq_length[s])
            ]) for s in range(len(batch_size))
        ])
        values = tf.reshape(
            seq_convertors.seq2nonseq(logits, logit_seq_length), [-1])
        shape = [batch_size, len(targets)]
        sparse_targets = tf.SparseTensor(indices, values, shape)

        tf.nn.ctc_loss(tf.pack(logits), sparse_targets, logit_seq_length)
示例#10
0
    def __init__(self, classifier, input_dim, max_length):
        '''
        NnetDecoder constructor, creates the decoding graph

        Args:
            classifier: the classifier that will be used for decoding
            input_dim: the input dimension to the nnnetgraph
        '''

        self.graph = tf.Graph()
        self.max_length = max_length

        with self.graph.as_default():

            #create the inputs placeholder
            self.inputs = tf.placeholder(
                tf.float32, shape=[max_length, input_dim], name='inputs')

            #create the sequence length placeholder
            self.seq_length = tf.placeholder(
                tf.int32, shape=[1], name='seq_length')

            split_inputs = tf.unpack(tf.expand_dims(self.inputs, 1))

            #create the decoding graph
            logits, _, self.saver, _ = classifier(split_inputs, self.seq_length,
                                               is_training=False, reuse=False,
                                               scope='Classifier')

            #convert logits to non sequence for the softmax computation
            logits = seq_convertors.seq2nonseq(logits, self.seq_length)

            #compute the outputs
            self.outputs = tf.nn.softmax(logits)

        #specify that the graph can no longer be modified after this point
        self.graph.finalize()