def build_single_graph(self, id_gpu, name_gpu, tensors_input): with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)): inputs = tensors_input.feature_splits[id_gpu] len_inputs = tensors_input.len_fea_splits[id_gpu] inputs.set_shape([None, None, self.size_embedding]) if self.type == 'LSTM': from tfSeq2SeqModels.decoders.lm_decoder import LM_Decoder self.decoder = LM_Decoder(self.args, self.is_train, self.embed_table_decoder) logits = self.decoder(inputs, len_inputs) elif self.type == 'SelfAttention': from tfSeq2SeqModels.decoders.self_attention_lm_decoder import SelfAttentionDecoder self.decoder= SelfAttentionDecoder(self.args, self.is_train, self.embed_table_decoder) # from tfSeq2SeqModels.decoders.self_attention_lm_decoder_lh import SelfAttentionDecoder_lh # decoder = SelfAttentionDecoder_lh(self.args, self.is_train, self.embed_table_decoder) logits = self.decoder(inputs, len_inputs) len_logits = tensors_input.len_label_splits[id_gpu] loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tensors_input.label_splits[id_gpu], logits=logits) loss *= tf.sequence_mask( tensors_input.len_label_splits[id_gpu], maxlen=tf.shape(logits)[1], dtype=logits.dtype) if self.args.model.confidence_penalty: ls_loss = self.args.model.confidence_penalty * confidence_penalty(logits, len_logits) ls_loss = tf.reduce_mean(ls_loss) loss += ls_loss # from tfModels.tensor2tensor.common_layers import padded_cross_entropy, weights_nonzero # # mask = tf.sequence_mask( # tensors_input.len_label_splits[id_gpu], # maxlen=tf.shape(logits)[1], # dtype=logits.dtype) # batch_mask = tf.tile(tf.expand_dims(mask, -1), [1, 1, tf.shape(logits)[-1]]) # loss, _ = padded_cross_entropy( # logits* batch_mask, # tensors_input.label_splits[id_gpu], # 0.0, # weights_fn=weights_nonzero, # reduce_sum=False) # loss = tf.Print(loss, [weight_sum], message='weight_sum', summarize=1000) if self.is_train: with tf.name_scope("gradients"): gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info('\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients else: return loss
def build_single_graph(self, id_gpu, name_gpu, tensors_input): with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)): encoder = self.gen_encoder( is_train=self.is_train, embed_table=self.embed_table_encoder, args=self.args) decoder = self.gen_decoder( is_train=self.is_train, embed_table=self.embed_table_decoder, global_step=self.global_step, args=self.args) self.schedule = decoder.schedule encoded, len_encoded = encoder( features=tensors_input.feature_splits[id_gpu], len_feas=tensors_input.len_fea_splits[id_gpu]) decoder_input = decoder.build_input( id_gpu=id_gpu, tensors_input=tensors_input) # if in the infer, the decoder_input.input_labels and len_labels are None decoder.build_helper( type=self.helper_type, labels=decoder_input.input_labels, len_labels=decoder_input.len_labels, batch_size=tf.shape(len_encoded)[0]) logits, preds, len_decoded = decoder(encoded, len_encoded) if self.is_train: if self.args.model.loss_type == 'OCD': # logits = tf.Print(logits, [tensors_input.len_label_splits[id_gpu][0]], message='label length: ', summarize=1000) # logits = tf.Print(logits, [tf.shape(logits[0])], message='logits shape: ', summarize=1000) loss, (optimal_targets, optimal_distributions) = self.ocd_loss( logits=logits, len_logits=len_decoded, labels=tensors_input.label_splits[id_gpu], preds=preds) elif self.args.model.loss_type == 'CE': loss = self.ce_loss( logits=logits, labels=decoder_input.output_labels[:, :tf.shape(logits)[1]], len_labels=decoder_input.len_labels) elif self.args.model.loss_type == 'Premium_CE': table_targets_distributions = tf.nn.softmax(tf.constant(self.args.table_targets)) loss = self.premium_ce_loss( logits=logits, labels=tensors_input.label_splits[id_gpu], table_targets_distributions=table_targets_distributions, len_labels=tensors_input.len_label_splits[id_gpu]) else: raise NotImplemented('NOT found loss type!') with tf.name_scope("gradients"): assert loss.get_shape().ndims == 1 loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info('\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: # no_op is preserved for debug info to pass # return loss, gradients, tf.no_op() return loss, gradients, [len_decoded, preds, tensors_input.label_splits[id_gpu]] else: return logits, len_decoded, preds
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer( tf.variance_scaling_initializer(1.0, mode="fan_avg", distribution="uniform")) with tf.device( lambda op: choose_device(op, name_gpu, self.center_device)): # create encoder obj encoder = self.gen_encoder(is_train=self.is_train, args=self.args) decoder = self.gen_decoder(is_train=self.is_train, embed_table=None, global_step=self.global_step, args=self.args) features = tensors_input.feature_splits[id_gpu] # using encoder to encode the inout sequence hidden_output, len_hidden_output = encoder( features=features, len_feas=tensors_input.len_fea_splits[id_gpu]) logits, align, len_logits = decoder(hidden_output, len_hidden_output) if self.is_train: loss = self.ctc_loss( logits=logits, len_logits=len_logits, labels=tensors_input.label_splits[id_gpu], len_labels=tensors_input.len_label_splits[id_gpu]) if self.args.model.balance_training: token_loss = loss / tf.to_float(len_logits) musk = tf.to_float( tf.greater(token_loss, self.args.model.balance_training)) loss *= musk if self.args.model.confidence_penalty: cp_loss = self.args.model.decoder.confidence_penalty * confidence_penalty( logits, len_logits) assert cp_loss.get_shape().ndims == 1 loss += cp_loss if self.args.model.constrain_repeated: from tfModels.CTCShrink import repeated_constrain_loss loss_constrain = repeated_constrain_loss( distribution_acoustic=logits, hidden=hidden_output, len_acoustic=len_hidden_output, blank_id=self.args.dim_output - 1) loss += self.args.model.constrain_repeated * loss_constrain with tf.name_scope("gradients"): assert loss.get_shape().ndims == 1 loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info( '\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, [align, tensors_input.label_splits[id_gpu]] else: return logits, len_logits
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer( tf.variance_scaling_initializer(1.0, mode="fan_avg", distribution="uniform")) with tf.device( lambda op: choose_device(op, name_gpu, self.center_device)): # build ctc model decoded_ctc, _, distribution_ctc = self.ctc_model.list_run from tfModels.CTCShrink import feature_shrink_tf blank_id = self.args.dim_output - 1 feature_shrunk, len_shrunk = feature_shrink_tf( distribution=distribution_ctc, feature=tensors_input.feature_splits[id_gpu], len_feature=tensors_input.len_fea_splits[id_gpu], blank_id=blank_id, frame_expand=self.args.model.frame_expand) feature_shrunk = tf.stop_gradient(feature_shrunk) len_shrunk = tf.stop_gradient(len_shrunk) # build sequence labellings model self.encoder = self.gen_encoder(is_train=self.is_train, args=self.args) self.decoder = self.gen_decoder(is_train=self.is_train, embed_table=self.embedding_tabel, global_step=self.global_step, args=self.args, name='decoder') self.schedule = self.decoder.schedule hidden_output, len_hidden_output = self.encoder( features=feature_shrunk, len_feas=len_shrunk) if (not self.is_train) and (self.args.beam_size > 1): # infer phrase with tf.variable_scope(self.decoder.name or 'decoder'): if self.args.dirs.lm_checkpoint: logging.info('beam search with language model ...') logits, decoded, len_decoded = self.decoder.beam_decode_rerank( hidden_output, len_hidden_output) else: logging.info('beam search ...') logits, decoded, len_decoded = self.decoder.beam_decode( hidden_output, len_hidden_output) else: # train phrase print('greedy search ...') logits, decoded, len_decoded = self.decoder( hidden_output, len_hidden_output) if self.is_train: if self.args.model.decoder_loss == 'CE': loss = self.ce_loss( logits=logits, labels=tensors_input.label_splits[id_gpu], len_logits=len_decoded, len_labels=tensors_input.len_label_splits[id_gpu]) elif self.args.model.decoder_loss == 'OCD': loss = self.ocd_loss( logits=logits, len_logits=len_decoded, labels=tensors_input.label_splits[id_gpu], decoded=decoded, len_decoded=len_decoded) else: logging.info('not found loss type for decoder!') with tf.name_scope("gradients"): assert loss.get_shape().ndims == 1 loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info( '\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, \ [decoded, tensors_input.label_splits[id_gpu], loss] # return loss, gradients, tf.no_op() else: return logits, len_decoded, decoded
def build_single_graph(self, id_gpu, name_gpu, tensors_input): with tf.device( lambda op: choose_device(op, name_gpu, self.center_device)): encoder = self.gen_encoder(is_train=self.is_train, embed_table=None, args=self.args) decoder = self.gen_decoder(is_train=self.is_train, embed_table=self.embedding_tabel, global_step=self.global_step, args=self.args) with tf.variable_scope(encoder.name or 'encoder'): encoded, len_encoded = encoder( features=tensors_input.feature_splits[id_gpu], len_feas=tensors_input.len_fea_splits[id_gpu]) with tf.variable_scope(decoder.name or 'decoder'): decoder_input = decoder.build_input( id_gpu=id_gpu, tensors_input=tensors_input) if (not self.is_train) or (self.args.model.training_type == 'self-learning'): ''' training_type: - self-learning: get logits fully depend on self - teacher-forcing: get logits depend on labels during training ''' # infer phrases if self.args.beam_size > 1: logging.info('beam search with language model ...') results, preds, len_decoded = decoder.beam_decode_rerank( encoded, len_encoded) else: logging.info('gready search ...') results, preds, len_decoded = decoder.decoder_with_caching( encoded, len_encoded) else: logging.info('teacher-forcing training ...') decoder_input_labels = decoder_input.input_labels * tf.sequence_mask( decoder_input.len_labels, maxlen=tf.shape(decoder_input.input_labels)[1], dtype=tf.int32) logits, preds = decoder.decode( encoded=encoded, len_encoded=len_encoded, decoder_input=decoder_input_labels) if self.is_train: if self.args.model.loss_type == 'OCD': """ constrain the max decode length for ocd training since model will decode to that long at beginning. Recommend 30. """ logits = results loss, _ = self.ocd_loss(logits=logits, len_logits=len_decoded, labels=decoder_input.output_labels, preds=preds) elif self.args.model.loss_type == 'beam_OCD': logits, preds, len_decoded, _, _ = results batch = tf.shape(logits)[0] beam_size = self.args.beam_size batch_x_beam = batch * beam_size logits = tf.reshape( logits, [batch_x_beam, -1, self.args.dim_output]) len_decoded = tf.reshape(len_decoded, [-1]) preds = tf.reshape(preds, [batch_x_beam, -1]) labels = tf.reshape( tf.tile(decoder_input.output_labels[:, None, :], [1, beam_size, 1]), [batch_x_beam, -1]) # logits = tf.Print(logits, [batch_x_beam, tf.shape(logits), tf.shape(preds), tf.shape(labels), tf.shape(len_decoded)], message='batch_x_beam, logits, preds, labels, len_decoded: ', summarize=1000) loss, _ = self.ocd_loss(logits=logits, len_logits=len_decoded, labels=labels, preds=preds) elif self.args.model.loss_type == 'CE': loss = self.ce_loss(logits=logits, labels=decoder_input.output_labels, len_labels=decoder_input.len_labels) elif self.args.model.loss_type == 'Premium_CE': table_targets_distributions = tf.nn.softmax( tf.constant(self.args.table_targets)) loss = self.premium_ce_loss( logits=logits, labels=decoder_input.output_labels, table_targets_distributions=table_targets_distributions, len_labels=decoder_input.len_labels) else: raise NotImplementedError('NOT found loss type: {}'.format( self.args.model.loss_type)) with tf.name_scope("gradients"): assert loss.get_shape().ndims == 1 loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info( '\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: # no_op is preserved for debug info to pass return loss, gradients, [preds, tensors_input.label_splits[id_gpu]] else: return results, len_decoded, preds
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer(tf.variance_scaling_initializer( 1.0, mode="fan_avg", distribution="uniform")) with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)): encoder = self.gen_encoder( is_train=self.is_train, args=self.args) self.decoder = decoder = self.gen_decoder( is_train=self.is_train, embed_table=self.embed_table_decoder, global_step=self.global_step, args=self.args) self.schedule = decoder.schedule encoded, len_encoded = encoder( features=tensors_input.feature_splits[id_gpu], len_feas=tensors_input.len_fea_splits[id_gpu]) if (not self.is_train) and (self.args.beam_size>1): with tf.variable_scope(decoder.name or 'decoder'): # fake logits! decoded, logits = decoder.beam_decode_rerank(encoded, len_encoded) else: logits, decoded, len_decoded = decoder(encoded, len_encoded) if self.is_train: loss = 0 if self.args.rna_train: rna_loss = self.rna_loss( logits=logits, len_logits=len_encoded, labels=tensors_input.label_splits[id_gpu], len_labels=tensors_input.len_label_splits[id_gpu], encoded=encoded, len_encoded=len_encoded) loss += rna_loss if self.args.OCD_train > 0: ocd_loss = self.args.OCD_train * self.ocd_loss( logits=logits, len_logits=len_decoded, labels=tensors_input.label_splits[id_gpu], decoded=decoded, len_decoded=len_decoded) assert ocd_loss.get_shape().ndims == loss.get_shape().ndims == 1 loss = rna_loss + ocd_loss else: ocd_loss = tf.constant(0) with tf.name_scope("gradients"): loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info('\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, [decoded, tensors_input.label_splits[id_gpu], ocd_loss] # return loss, gradients, tf.no_op() else: return logits, len_decoded, decoded
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer( tf.variance_scaling_initializer(1.0, mode="fan_avg", distribution="uniform")) with tf.device( lambda op: choose_device(op, name_gpu, self.center_device)): self.encoder = self.gen_encoder(is_train=self.is_train, args=self.args) self.fc_decoder = self.gen_decoder(is_train=self.is_train, embed_table=None, global_step=self.global_step, args=self.args, name='decoder') self.decoder = decoder = self.gen_decoder2( is_train=self.is_train, embed_table=self.embedding_tabel, global_step=self.global_step, args=self.args, name='decoder2') self.schedule = decoder.schedule hidden_output, len_hidden_output = self.encoder( features=tensors_input.feature_splits[id_gpu], len_feas=tensors_input.len_fea_splits[id_gpu]) acoustic, alignment, len_acoustic = self.fc_decoder( hidden_output, len_hidden_output) if not self.args.model.train_encoder: acoustic = tf.stop_gradient(acoustic) len_acoustic = tf.stop_gradient(len_acoustic) # used to guide the shrinking of the hidden_output distribution_acoustic = tf.nn.softmax(acoustic) blank_id = self.args.dim_output - 1 if self.args.model.true_end2end: from tfModels.CTCShrink import acoustic_hidden_shrink_v3 hidden_shrunk, len_no_blank = acoustic_hidden_shrink_v3( distribution_acoustic, hidden_output, len_acoustic, blank_id, self.args.model.frame_expand) else: from tfModels.CTCShrink import acoustic_hidden_shrink_tf hidden_shrunk, len_no_blank = acoustic_hidden_shrink_tf( distribution_acoustic=distribution_acoustic, hidden=hidden_output, len_acoustic=len_acoustic, blank_id=blank_id, frame_expand=self.args.model.frame_expand) if (not self.is_train) and (self.args.beam_size > 1): # infer phrase with tf.variable_scope(decoder.name or 'decoder'): if self.args.dirs.lm_checkpoint: logging.info('beam search with language model ...') logits, decoded, len_decoded = decoder.beam_decode_rerank( hidden_shrunk, len_no_blank) else: logging.info('beam search ...') logits, decoded, len_decoded = decoder.beam_decode( hidden_shrunk, len_no_blank) else: # train phrase print('greedy search ...') logits, decoded, len_decoded = decoder(hidden_shrunk, len_no_blank) if self.is_train: if self.args.model.decoder_loss == 'CE': ocd_loss = self.ce_loss( logits=logits, labels=tensors_input.label_splits[id_gpu], len_logits=len_acoustic, len_labels=tensors_input.len_label_splits[id_gpu]) elif self.args.model.decoder_loss == 'OCD': ocd_loss = self.ocd_loss( logits=logits, len_logits=len_decoded, labels=tensors_input.label_splits[id_gpu], decoded=decoded, len_decoded=len_decoded) elif self.args.model.decoder_loss == 'Premium_CE': table_targets_distributions = tf.nn.softmax( tf.constant(self.args.table_targets)) ocd_loss = self.premium_ce_loss( logits=logits, labels=tensors_input.label_splits[id_gpu], table_targets_distributions=table_targets_distributions, len_logits=len_decoded, len_labels=tensors_input.len_label_splits[id_gpu]) elif self.args.model.decoder_loss == 'LM_CE': ocd_loss = self.lm_ce_loss( logits=logits, len_logits=len_decoded, labels=tensors_input.label_splits[id_gpu], decoded=decoded, len_decoded=len_decoded) else: logging.info('not found loss type for decoder!') if self.args.model.train_encoder: ctc_loss = self.ctc_loss( logits=acoustic, len_logits=len_acoustic, labels=tensors_input.label_splits[id_gpu], len_labels=tensors_input.len_label_splits[id_gpu]) else: ctc_loss = tf.constant(0.0) loss = self.schedule * ocd_loss + (1 - self.schedule) * ctc_loss with tf.name_scope("gradients"): assert loss.get_shape().ndims == 1 loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info( '\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, \ [decoded, tensors_input.label_splits[id_gpu], distribution_acoustic, len_acoustic, len_no_blank, hidden_shrunk, ctc_loss, ocd_loss] # return loss, gradients, tf.no_op() else: return logits, len_decoded, decoded
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer( tf.variance_scaling_initializer(1.0, mode="fan_avg", distribution="uniform")) with tf.device( lambda op: choose_device(op, name_gpu, self.center_device)): self.encoder = self.gen_encoder(is_train=self.is_train, args=self.args) self.fc_decoder = self.gen_decoder(is_train=self.is_train, embed_table=None, global_step=self.global_step, args=self.args, name='decoder') self.decoder = decoder = self.gen_decoder2( is_train=self.is_train, embed_table=self.embedding_tabel, global_step=self.global_step, args=self.args, name='decoder2') hidden_output, len_hidden_output = self.encoder( features=tensors_input.feature_splits[id_gpu], len_feas=tensors_input.len_fea_splits[id_gpu]) logits_acoustic, alignment, len_acoustic = self.fc_decoder( hidden_output, len_hidden_output) logits_acoustic = tf.stop_gradient(logits_acoustic) len_acoustic = tf.stop_gradient(len_acoustic) distribution_acoustic = tf.nn.softmax(logits_acoustic) # whether to shrink the hidden or the acoutic distribution if not self.args.model.shrink_hidden: hidden_output = distribution_acoustic blank_id = self.args.dim_ctc_output - 1 if self.args.dim_ctc_output else self.args.dim_output - 1 hidden_shrunk, len_no_blank = acoustic_hidden_shrink_tf( distribution_acoustic=distribution_acoustic, hidden=hidden_output, len_acoustic=len_acoustic, blank_id=blank_id, num_post=self.args.model.num_post, frame_expand=self.args.model.frame_expand) if (not self.is_train) and (self.args.beam_size > 1): # infer phrase with tf.variable_scope(decoder.name or 'decoder'): logits, decoded, len_decoded = decoder.beam_decode_rerank( hidden_shrunk, len_no_blank) else: # train phrase logits, decoded, len_decoded = decoder(hidden_shrunk, len_no_blank) if self.is_train: if self.args.model.use_ce_loss: loss = self.ce_loss( logits=logits, labels=tensors_input.label_splits[id_gpu], len_logits=len_acoustic, len_labels=tensors_input.len_label_splits[id_gpu]) else: loss = self.ocd_loss( logits=logits, len_logits=len_decoded, labels=tensors_input.label_splits[id_gpu], decoded=decoded) if self.args.model.confidence_penalty > 0: # utt-level cp_loss = self.args.model.confidence_penalty * \ confidence_penalty(logits, len_decoded)/len_decoded loss += cp_loss if self.args.model.musk_update: self.idx_update = self.deserve_idx( decoded, len_decoded, tensors_input.label_splits[id_gpu], tensors_input.len_label_splits[id_gpu]) loss = tf.reshape(tf.gather(loss, self.idx_update), [-1]) l2_loss = tf.add_n( [tf.nn.l2_loss(v) for v in self.decoder.params]) with tf.name_scope("gradients"): loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info( '\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, \ [decoded, tensors_input.label_splits[id_gpu], l2_loss] # return loss, gradients, tf.no_op() else: return logits, len_decoded, decoded
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer(tf.variance_scaling_initializer( 1.0, mode="fan_avg", distribution="uniform")) with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)): batch_size = 1200 if self.is_train else 3 state_agent_init = self.agent.zero_state(batch_size) state_lm_init = self.env.lm.zero_state(batch_size) rewards_lm_init = tf.zeros([batch_size, 0]) actions_init = tf.zeros([batch_size, 0], dtype=tf.int32) logits_init = tf.zeros([batch_size, 0, self.args.dim_output]) def step(i, state_agent, state_lm, rewards_lm, actions, logits): # generate env state state_env = tf.concat(state_lm[-1], -1) # agent takes action cur_logit, next_state_agent = self.agent.forward(state_env, state_agent) policy = tf.nn.softmax(cur_logit, name='actor_prob') action = tf.cond( tf.less(self.choose, 0.8), lambda: tf.distributions.Categorical(probs=policy).sample(), lambda: tf.distributions.Categorical(logits=tf.ones_like(cur_logit)).sample()) logits = tf.concat([logits, cur_logit[:, None, :]], 1) # env transfers staten and bills rewards next_state_lm, reward_lm, info = self.env.step(action, state_lm) rewards_lm = tf.concat([rewards_lm, reward_lm[:, None]], 1) actions = tf.concat([actions, action[:, None]], 1) return i+1, next_state_agent, next_state_lm, rewards_lm, actions, logits _, _, _, rewards_lm, actions, logits = tf.while_loop( cond=lambda i, *_: tf.less(i, 20), body=step, loop_vars=[0, state_agent_init, state_lm_init, rewards_lm_init, actions_init, logits_init], shape_invariants=[tf.TensorShape([]), nest.map_structure(lambda t: tf.TensorShape(t.shape), state_agent_init), nest.map_structure(lambda t: tf.TensorShape(t.shape), state_lm_init), tf.TensorShape([None, None]), tf.TensorShape([None, None]), tf.TensorShape([None, None, self.args.dim_output])] ) if self.is_train: rewards = rewards_lm rewards_discounted = self.discount(self.discount_rate, rewards) rewards_discounted = tf.stop_gradient(rewards_discounted) crossent = smoothing_cross_entropy( logits=logits, labels=actions, vocab_size=self.args.dim_output, confidence=1.0) loss = crossent * rewards_discounted loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info('\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, \ [tf.reduce_sum(rewards_lm, -1), actions] else: return actions, rewards_lm
def build_single_graph(self, id_gpu, name_gpu, tensors_input): tf.get_variable_scope().set_initializer( tf.variance_scaling_initializer(1.0, mode="fan_avg", distribution="uniform")) with tf.device( lambda op: choose_device(op, name_gpu, self.center_device)): batch_size = tf.shape(tensors_input.len_fea_splits[id_gpu])[0] state_agent_init = self.agent.zero_state(batch_size) state_lm_init = self.env.lm.zero_state(batch_size) rewards_lm_init = tf.zeros([batch_size, 0]) actions_init = tf.zeros([batch_size, 0], dtype=tf.int32) logits_init = tf.zeros([batch_size, 0, self.args.dim_output]) frames, len_frames = self.processor.process( inputs=tensors_input.feature_splits[id_gpu], len_inputs=tensors_input.len_fea_splits[id_gpu]) # frames = tf.stop_gradient(frames) def step(i, state_agent, state_lm, rewards_lm, actions, logits): # generate env state state_ac = frames[:, i, :] state_env = tf.concat( [state_ac, tf.concat(state_lm[-1], -1)], 1) # agent takes action cur_logit, next_state_agent = self.agent.forward( state_env, state_agent) policy = tf.nn.softmax(cur_logit, name='actor_prob') logits = tf.concat([logits, cur_logit[:, None, :]], 1) action = tf.distributions.Categorical(probs=policy).sample() # env transfers staten and bills rewards next_state_lm, reward_lm, info = self.env.step( action, state_lm) rewards_lm = tf.concat([rewards_lm, reward_lm[:, None]], 1) actions = tf.concat([actions, action[:, None]], 1) return i + 1, next_state_agent, next_state_lm, rewards_lm, actions, logits _, _, _, rewards_lm, actions, logits = tf.while_loop( cond=lambda i, *_: tf.less(i, tf.shape(frames)[1]), body=step, loop_vars=[ 0, state_agent_init, state_lm_init, rewards_lm_init, actions_init, logits_init ], shape_invariants=[ tf.TensorShape([]), nest.map_structure(lambda t: tf.TensorShape(t.shape), state_agent_init), nest.map_structure(lambda t: tf.TensorShape(t.shape), state_lm_init), tf.TensorShape([None, None]), tf.TensorShape([None, None]), tf.TensorShape([None, None, self.args.dim_output]) ]) pad_musk = tf.sequence_mask(len_frames, maxlen=tf.shape(frames)[1], dtype=tf.float32) rewards_lm *= pad_musk if self.is_train: q_value = Qvalue(actions, tensors_input.label_splits[id_gpu]) # rewards_ac: the temporal-difference Q value of each step rewards_ac = tf.to_float(q_value[:, 1:] - q_value[:, :-1]) rewards_lm = tf.zeros_like(rewards_ac) rewards_ac *= pad_musk rewards = rewards_ac + rewards_lm # rewards = tf.Print(rewards, [tf.reduce_sum(rewards_lm, -1)], message='rewards_lm', summarize=1000) rewards_discounted = self.discount(self.discount_rate, rewards) rewards_discounted = tf.stop_gradient(rewards_discounted) crossent = smoothing_cross_entropy( logits=logits, labels=actions, vocab_size=self.args.dim_output, confidence=1.0) # crossent = tf.Print(crossent, [tf.reduce_sum(crossent)], message='crossent: ', summarize=1000) loss = crossent * rewards_discounted * pad_musk loss = tf.reduce_mean(loss) gradients = self.optimizer.compute_gradients(loss) self.__class__.num_Model += 1 logging.info( '\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Model)) if self.is_train: return loss, gradients, \ [tf.reduce_sum(rewards_ac, -1), tf.reduce_sum(rewards_lm, -1), actions] else: return actions, rewards_lm
def build_single_graph(self, id_gpu, name_gpu, tensors_input): """ be used for build infer model and the train model, conditioned on self.is_train """ # build model in one device num_cell_units = self.args.model.num_cell_units cell_type = self.args.model.cell_type dropout = self.args.model.dropout forget_bias = self.args.model.forget_bias use_residual = self.args.model.use_residual hidden_output = tensors_input.feature_splits[id_gpu] with tf.device(lambda op: choose_device(op, name_gpu, self.center_device)): for i in range(self.args.model.num_lstm_layers): # build one layer: build block, connect block single_cell = build_cell( num_units=num_cell_units, num_layers=1, is_train=self.is_train, cell_type=cell_type, dropout=dropout, forget_bias=forget_bias, use_residual=use_residual) hidden_output, _ = cell_forward( cell=single_cell, inputs=hidden_output, index_layer=i) hidden_output = fully_connected( inputs=hidden_output, num_outputs=num_cell_units, activation_fn=tf.nn.tanh, scope='wx_b'+str(i)) if self.args.model.use_layernorm: hidden_output = layer_norm(hidden_output) logits = fully_connected(inputs=hidden_output, num_outputs=self.args.dim_output, activation_fn=tf.identity, scope='fully_connected') # Accuracy with tf.name_scope("label_accuracy"): correct = tf.nn.in_top_k(logits, tf.reshape(tensors_input.label_splits[id_gpu], [-1]), 1) correct = tf.multiply(tf.cast(correct, tf.float32), tf.reshape(tensors_input.mask_splits[id_gpu], [-1])) label_accuracy = tf.reduce_sum(correct) # Cross entropy loss with tf.name_scope("CE_loss"): cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.reshape(tensors_input.label_splits[id_gpu], [-1]), logits=logits) cross_entropy = tf.multiply(cross_entropy, tf.reshape(tensors_input.mask_splits[id_gpu], [-1])) cross_entropy_loss = tf.reduce_sum(cross_entropy) / tf.reduce_sum(tensors_input.mask_splits[id_gpu]) loss = cross_entropy_loss if self.is_train: with tf.name_scope("gradients"): gradients = self.optimizer.compute_gradients(loss) logging.info('\tbuild {} on {} succesfully! total model number: {}'.format( self.__class__.__name__, name_gpu, self.__class__.num_Instances)) return loss, gradients if self.is_train else logits