def compute_loss(self, targets, logits, logit_seq_length, target_seq_length): ''' Compute the loss Creates the operation to compute the cross-enthropy loss for every input frame (if you want to have a different loss function, overwrite this method) Args: targets: a list that contains a Bx1 tensor containing the targets for eacht time step where B is the batch size logits: a list that contains a BxO tensor containing the output logits for eacht time step where O is the output dimension logit_seq_length: the length of all the input sequences as a vector target_seq_length: the length of all the target sequences as a vector Returns: a scalar value containing the loss ''' with tf.variable_scope('weight_loss'): trainable_weights = tf.trainable_variables() weight_loss = 0 for trainable in trainable_weights: weight_loss += tf.nn.l2_loss(trainable) weight_loss = weight_loss / len(trainable_weights) with tf.name_scope('cross_enthropy_loss'): #training starts at t=1. targets_t_one = targets[:, 1:, :] target_seq_length_t_one = target_seq_length - 1 #convert to non sequential data nonseq_targets = seq_convertors.seq2nonseq( targets_t_one, target_seq_length_t_one) nonseq_logits = seq_convertors.seq2nonseq(logits, logit_seq_length) #make a vector out of the targets nonseq_targets = tf.reshape(nonseq_targets, [-1]) #one hot encode the targets #pylint: disable=E1101 nonseq_targets = tf.one_hot(nonseq_targets, int(nonseq_logits.get_shape()[1])) #compute the cross-enthropy loss loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( nonseq_logits, nonseq_targets)) loss = loss + self.l2_cost_weight * weight_loss return loss
def compute_loss(self, targets, logits, logit_seq_length, target_seq_length): ''' Compute the loss Creates the operation to compute the cross-enthropy loss for every input frame (if you want to have a different loss function, overwrite this method) Args: targets: a list that contains a Bx1 tensor containing the targets for eacht time step where B is the batch size logits: a list that contains a BxO tensor containing the output logits for eacht time step where O is the output dimension logit_seq_length: the length of all the input sequences as a vector target_seq_length: the length of all the target sequences as a vector Returns: a scalar value containing the loss ''' with tf.name_scope('cross_enthropy_loss'): #convert to non sequential data nonseq_targets = seq_convertors.seq2nonseq(targets, target_seq_length) nonseq_logits = seq_convertors.seq2nonseq(logits, logit_seq_length) #make a vector out of the targets nonseq_targets = tf.reshape(nonseq_targets, [-1]) #one hot encode the targets #pylint: disable=E1101 nonseq_targets = tf.one_hot(nonseq_targets, int(nonseq_logits.get_shape()[1])) # Evaluate model # argmax取的是最大值的下标 correct_pred = tf.equal(tf.argmax(nonseq_logits, 1), tf.argmax(nonseq_targets, 1)) true_count = tf.reduce_sum(tf.cast(correct_pred, tf.float32)) # loss loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(logits=nonseq_logits, labels=nonseq_targets)) #compute the cross-enthropy loss return loss, true_count
def compute_loss(self, targets, logits, logit_seq_length, target_seq_length): ''' Compute the loss Creates the operation to compute the CTC loss for every input frame (if you want to have a different loss function, overwrite this method) Args: targets: a list that contains a Bx1 tensor containing the targets for eacht time step where B is the batch size logits: a list that contains a BxO tensor containing the output logits for eacht time step where O is the output dimension logit_seq_length: the length of all the input sequences as a vector target_seq_length: the length of all the target sequences as a vector Returns: a scalar value containing the loss ''' #get the batch size batch_size = int(target_seq_length.get_shape()[0]) #convert the targets into a sparse tensor representation indices = tf.concat(0, [tf.concat(1, [tf.tile([s], target_seq_length[s]) , tf.range(target_seq_length[s])]) for s in range(len(batch_size))]) values = tf.reshape(seq_convertors.seq2nonseq(logits, logit_seq_length), [-1]) shape = [batch_size, len(targets)] sparse_targets = tf.SparseTensor(indices, values, shape) tf.nn.ctc_loss(tf.pack(logits), sparse_targets, logit_seq_length)
def compute_loss(self, targets, logits, logit_seq_length, target_seq_length): ''' Compute the loss Creates the operation to compute the cross-enthropy loss for every input frame (if you want to have a different loss function, overwrite this method) Args: targets: a list that contains a Bx1 tensor containing the targets for eacht time step where B is the batch size logits: a list that contains a BxO tensor containing the output logits for eacht time step where O is the output dimension logit_seq_length: the length of all the input sequences as a vector target_seq_length: the length of all the target sequences as a vector Returns: a scalar value containing the loss ''' with tf.name_scope('cross_enthropy_loss'): #convert to non sequential data nonseq_targets = seq_convertors.seq2nonseq(targets, target_seq_length) nonseq_logits = seq_convertors.seq2nonseq(logits, logit_seq_length) #make a vector out of the targets nonseq_targets = tf.reshape(nonseq_targets, [-1]) #one hot encode the targets #pylint: disable=E1101 nonseq_targets = tf.one_hot(nonseq_targets, int(nonseq_logits.get_shape()[1])) #compute the cross-enthropy loss return tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits( nonseq_logits, nonseq_targets))
def compute_loss(self, targets, logits, logit_seq_length, target_seq_length): ''' Compute the loss Creates the operation to compute the CTC loss for every input frame (if you want to have a different loss function, overwrite this method) Args: targets: a [batch_size, max_target_length, 1] tensor containing the targets logits: a [batch_size, max_input_length, dim] tensor containing the inputs logit_seq_length: the length of all the input sequences as a vector target_seq_length: the length of all the target sequences as a vector Returns: a scalar value containing the loss ''' with tf.name_scope('CTC_loss'): #get the batch size batch_size = int(targets.get_shape()[0]) #convert the targets into a sparse tensor representation indices = tf.concat([ tf.concat([ tf.expand_dims(tf.tile([s], [target_seq_length[s]]), 1), tf.expand_dims(tf.range(target_seq_length[s]), 1) ], 1) for s in range(batch_size) ], 0) values = tf.reshape( seq_convertors.seq2nonseq(targets, target_seq_length), [-1]) shape = [batch_size, int(targets.get_shape()[1])] sparse_targets = tf.SparseTensor(tf.cast(indices, tf.int64), values, shape) loss = tf.reduce_sum( tf.nn.ctc_loss(sparse_targets, logits, logit_seq_length, time_major=False)) return loss
def get_outputs(self, logits, logits_seq_length): ''' Put the classifier output logits through a softmax Args: logits: A list containing a 1xO tensor for each timestep where O is the classifier output dimension logits_seq_length: the logits sequence length Returns: An NxO tensor containing posterior distributions ''' #convert logits to non sequence for the softmax computation logits = seq_convertors.seq2nonseq(logits, logits_seq_length) return tf.nn.softmax(logits)
def __init__(self, classifier, input_dim, max_length): ''' NnetDecoder constructor, creates the decoding graph Args: classifier: the classifier that will be used for decoding input_dim: the input dimension to the nnnetgraph ''' self.graph = tf.Graph() self.max_length = max_length with self.graph.as_default(): #create the inputs placeholder self.inputs = tf.placeholder(tf.float32, shape=[max_length, input_dim], name='inputs') #create the sequence length placeholder self.seq_length = tf.placeholder(tf.int32, shape=[1], name='seq_length') split_inputs = tf.unstack(tf.expand_dims(self.inputs, 1)) #create the decoding graph logits, _, self.saver, _ = classifier(split_inputs, self.seq_length, is_training=False, reuse=False, scope='Classifier') #convert logits to non sequence for the softmax computation logits = seq_convertors.seq2nonseq(logits, self.seq_length) #compute the outputs self.outputs = tf.nn.softmax(logits) # merge all summary during the decoding self.merged = tf.summary.merge_all() self.summarywriter = tf.summary.FileWriter( logdir="tf-exp/decode_vis", graph=self.graph) self.decode_visualisation = False #specify that the graph can no longer be modified after this point self.graph.finalize()
def get_outputs(self, logits, logits_seq_length): ''' Put the classifier output logits through a softmax Args: logits: A list containing a 1xO tensor for each timestep where O is the classifier output dimension logits_seq_length: the logits sequence length Returns: An NxO tensor containing posterior distributions ''' # convert logits to non sequence for the softmax computation logits = seq_convertors.seq2nonseq(logits, logits_seq_length) ''' softmax(logits, dim=-1, name=None): Computes softmax activations. For each batch `i` and class `j` we have softmax = exp(logits) / reduce_sum(exp(logits), dim) ''' return tf.nn.softmax(logits)
def compute_loss(self, targets, logits, logit_seq_length, target_seq_length): ''' Compute the loss Creates the operation to compute the CTC loss for every input frame (if you want to have a different loss function, overwrite this method) Args: targets: a list that contains a Bx1 tensor containing the targets for eacht time step where B is the batch size logits: a list that contains a BxO tensor containing the output logits for eacht time step where O is the output dimension logit_seq_length: the length of all the input sequences as a vector target_seq_length: the length of all the target sequences as a vector Returns: a scalar value containing the loss ''' #get the batch size batch_size = int(target_seq_length.get_shape()[0]) #convert the targets into a sparse tensor representation indices = tf.concat(0, [ tf.concat(1, [ tf.tile([s], target_seq_length[s]), tf.range(target_seq_length[s]) ]) for s in range(len(batch_size)) ]) values = tf.reshape( seq_convertors.seq2nonseq(logits, logit_seq_length), [-1]) shape = [batch_size, len(targets)] sparse_targets = tf.SparseTensor(indices, values, shape) tf.nn.ctc_loss(tf.pack(logits), sparse_targets, logit_seq_length)
def __init__(self, classifier, input_dim, max_length): ''' NnetDecoder constructor, creates the decoding graph Args: classifier: the classifier that will be used for decoding input_dim: the input dimension to the nnnetgraph ''' self.graph = tf.Graph() self.max_length = max_length with self.graph.as_default(): #create the inputs placeholder self.inputs = tf.placeholder( tf.float32, shape=[max_length, input_dim], name='inputs') #create the sequence length placeholder self.seq_length = tf.placeholder( tf.int32, shape=[1], name='seq_length') split_inputs = tf.unpack(tf.expand_dims(self.inputs, 1)) #create the decoding graph logits, _, self.saver, _ = classifier(split_inputs, self.seq_length, is_training=False, reuse=False, scope='Classifier') #convert logits to non sequence for the softmax computation logits = seq_convertors.seq2nonseq(logits, self.seq_length) #compute the outputs self.outputs = tf.nn.softmax(logits) #specify that the graph can no longer be modified after this point self.graph.finalize()