def body(self, features): hparams = self._hparams targets = features["targets"] inputs = features["inputs"] target_space = features["target_space_id"] inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) = (transformer.transformer_prepare_encoder( inputs, target_space, hparams)) (decoder_input, decoder_self_attention_bias) = transformer.transformer_prepare_decoder( targets, hparams) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) encoder_output = transformer_revnet_encoder( encoder_input, encoder_self_attention_bias, hparams) decoder_output = transformer_revnet_decoder( decoder_input, encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output
def body(self, features): hparams = self._hparams targets = features["targets"] inputs = features["inputs"] target_space = features["target_space_id"] inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) (encoder_input, encoder_self_attention_bias, encoder_decoder_attention_bias) = ( transformer.transformer_prepare_encoder(inputs, target_space, hparams)) (decoder_input, decoder_self_attention_bias ) = transformer.transformer_prepare_decoder(targets, hparams) encoder_input = tf.nn.dropout( encoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_input = tf.nn.dropout( decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) encoder_output = transformer_revnet_encoder( encoder_input, encoder_self_attention_bias, hparams) decoder_output = transformer_revnet_decoder( decoder_input, encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output
def body(self, features): """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs. [batch_size, input_length, 1, hidden_dim]. "targets": Target decoder outputs. [batch_size, decoder_length, 1, hidden_dim] "target_space_id": A scalar int from data_generators.problem.SpaceID. Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ #self._hparams.add("warm_start_from",True) hparams = self._hparams losses = [] # if self.has_input: # inputs = features["inputs"] # target_space = features["target_space_id"] # encoder_output, encoder_decoder_attention_bias = self.encode( # inputs, target_space, hparams, features=features, losses=losses) # else: encoder_output, encoder_decoder_attention_bias = (None, None) lekeys = "inputs" if lekeys in features: targets = features["inputs"] lekeys = "inputs" else: targets = features["targets"] lekeys = "targets" targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams, features=features) decoder_output = self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=features_to_nonpadding( features, lekeys), losses=losses) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} ret = tf.reshape(decoder_output, targets_shape) if losses: return ret, {"extra_loss": tf.add_n(losses)} else: return ret
def body(self, features): """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs. [batch_size, input_length, 1, hidden_dim]. "targets": Target decoder outputs. [batch_size, decoder_length, 1, hidden_dim] "target_space_id": A scalar int from data_generators.problem.SpaceID. Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams losses = [] if self.has_input: raise AttributeError("Context transformer encoder not implemented") inputs = features["inputs"] target_space = features["target_space_id"] encoder_output, encoder_decoder_attention_biases = self.encode( inputs, target_space, hparams, features=features, losses=losses) else: encoder_output, encoder_decoder_attention_biases = (None, None) targets = features["targets"] targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams, features=features) decoder_self_attention_biases = expand_bias_modes( decoder_self_attention_bias, features["targets_seg"]) decoder_output = self.decode( decoder_input, encoder_output, encoder_decoder_attention_biases, decoder_self_attention_biases, hparams, nonpadding=features_to_nonpadding(features, "targets"), losses=losses) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} ret = tf.reshape(decoder_output, targets_shape) if losses: return ret, {"extra_loss": tf.add_n(losses)} else: return ret
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets, hparams, name, task=None): """Original Transformer decoder.""" with tf.variable_scope(name): if task is None: task = hparams.task if task == "translate": targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_bias = ( transformer.transformer_prepare_decoder(targets, hparams)) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = transformer.transformer_decoder( decoder_input, encoder_output, decoder_self_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, axis=2) else: assert task == "image" inputs = None # have to reshape targets as b, 32, 32, 3 * hidden size] beacuse otherwise # prepare_image will choke targets = tf.reshape(targets, [tf.shape(targets)[0], hparams.img_len, hparams.img_len, hparams.num_channels*hparams.hidden_size]) # Prepare decoder inputs and bias. decoder_input, _, _, bias = cia.prepare_decoder(targets, hparams) # Add class label to decoder input. if not hparams.drop_inputs: decoder_input += tf.reshape( inputs, [common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, bias, hparams.num_decoder_layers or hparams.num_hidden_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") decoder_output_shape = common_layers.shape_list(decoder_output) decoder_output = tf.reshape(decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size]) # Expand since t2t expects 4d tensors. return decoder_output
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets, hparams, name, task=None): """Original Transformer decoder.""" with tf.variable_scope(name): if task is None: task = hparams.task if task == "translate": targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_bias = ( transformer.transformer_prepare_decoder(targets, hparams)) decoder_input = tf.nn.dropout( decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = transformer.transformer_decoder( decoder_input, encoder_output, decoder_self_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, axis=2) else: assert task == "image" inputs = None # have to reshape targets as b, 32, 32, 3 * hidden size] beacuse otherwise # prepare_image will choke targets = tf.reshape(targets, [ tf.shape(targets)[0], hparams.img_len, hparams.img_len, hparams.num_channels * hparams.hidden_size ]) # Prepare decoder inputs and bias. decoder_input, _, _, bias = cia.prepare_decoder(targets, hparams) # Add class label to decoder input. if not hparams.drop_inputs: decoder_input += tf.reshape(inputs, [ common_layers.shape_list(targets)[0], 1, 1, hparams.hidden_size ]) decoder_output = cia.transformer_decoder_layers( decoder_input, None, bias, hparams.num_decoder_layers or hparams.num_hidden_layers, hparams, attention_type=hparams.dec_attention_type, name="decoder") decoder_output_shape = common_layers.shape_list(decoder_output) decoder_output = tf.reshape( decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size]) # Expand since t2t expects 4d tensors. return decoder_output
def body(self, features): """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "tragets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "target_space_id" Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams if self.has_input: inputs = features["inputs"] target_space = features["target_space_id"] encoder_output, encoder_decoder_attention_bias = self.encode( inputs, target_space, hparams, features=features) else: encoder_output, encoder_decoder_attention_bias = (None, None) targets = features["targets"] targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams, features=features) decoder_output = self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=features_to_nonpadding( features, "targets")) self.cache_flag = tf.py_func( self.sentence_cache.AddMultipleEntries, [features["targets_raw"], decoder_output], tf.float32, ) tf.cast(self.cache_flag, tf.float32) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} self.cache_flag.set_shape((1, )) return decoder_output + 0 * self.cache_flag
def vae_transformer_internal(inputs, targets, target_space, hparams): """VAE Transformer, main step used for training.""" with tf.variable_scope("vae_transformer"): is_training = hparams.mode == tf.contrib.learn.ModeKeys.TRAIN # Prepare inputs, targets, and k. inputs = common_layers.flatten4d3d(inputs) input_len = tf.shape(inputs)[1] # Double input size to cover targets. inputs = tf.pad(inputs, [[0, 0], [0, input_len], [0, 0]]) inputs.set_shape([None, None, hparams.hidden_size]) targets = common_layers.flatten4d3d(targets) k = 2**hparams.num_compress_steps inputs, targets = common_layers.pad_to_same_length( inputs, targets, final_length_divisible_by=k) inputs = encode(inputs, target_space, hparams, "input_enc") # Dropout targets or swap for zeros 5% of the time. targets_nodrop = targets max_prestep = hparams.kl_warmup_steps prob_targets = 0.95 if is_training else 1.0 targets_dropout_max = common_layers.inverse_lin_decay( max_prestep) - 0.01 targets = dropmask(targets, targets_dropout_max * 0.7, is_training) targets = tf.cond(tf.less(tf.random_uniform([]), prob_targets), lambda: targets, lambda: tf.zeros_like(targets)) targets = targets_nodrop # Compress and vae. z = tf.get_variable("z", [hparams.hidden_size]) z = tf.reshape(z, [1, 1, 1, -1]) z = tf.tile(z, [tf.shape(inputs)[0], 1, 1, 1]) z = attend(z, inputs, hparams, "z_attendsi") z = ffn(z, hparams, "zff2") z = attend(z, targets, hparams, "z_attendst2") z = ffn(z, hparams, "zff3") z, kl_loss, _, _ = vae(z, hparams, name="vae") z = tf.layers.dense(z, hparams.hidden_size, name="z_to_dense") # z, kl_loss, _, _ = vae_compress( # tf.expand_dims(targets, axis=2), tf.expand_dims(inputs, axis=2), # hparams, "vae_compress", "vae_decompress") decoder_in = tf.squeeze(z, axis=2) + tf.zeros_like(targets) (decoder_input, decoder_self_attention_bias) = ( transformer.transformer_prepare_decoder(decoder_in, hparams)) ret = transformer.transformer_decoder(decoder_input, inputs, decoder_self_attention_bias, None, hparams) kl_loss *= common_layers.inverse_exp_decay(int( max_prestep * 1.5)) * 5.0 losses = {"kl": kl_loss} return tf.expand_dims(ret, axis=2), losses
def _prepare_decoder(self, targets): """Process the transformer decoder input.""" targets = common_layers.flatten4d3d(targets) output = transformer.transformer_prepare_decoder( targets, self._hparams, features=None, ) deco_input, deco_self_attention_bias = output deco_input = tf.nn.dropout( deco_input, 1.0 - self._hparams.layer_prepostprocess_dropout ) return deco_input, deco_self_attention_bias
def _prepare_decoder(self, targets): """Process the transformer decoder input.""" targets = common_layers.flatten4d3d(targets) output = transformer.transformer_prepare_decoder( targets, self._hparams, features=None, ) deco_input, deco_self_attention_bias = output deco_input = tf.nn.dropout( deco_input, 1.0 - self._hparams.layer_prepostprocess_dropout ) return deco_input, deco_self_attention_bias
def decode(self, decoder_input, encoder_output, encoder_decoder_attention_bias, hparams): decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder( decoder_input, hparams) decoder_input = tf.nn.dropout( decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = transformer.transformer_decoder( decoder_input, encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias, hparams, cache=None) return decoder_output
def body(self, features): hparams = self._hparams losses = [] contexts = {} for feature_name in features: if 'context' in feature_name and 'raw' not in feature_name: contexts[feature_name] = features[feature_name] inputs = features["inputs"] target_space = features["target_space_id"] encoder_output, encoder_decoder_attention_bias = self.encode( inputs, contexts, target_space, hparams=hparams, features=features, losses=losses) targets = features["targets"] targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams, features=features) decoder_output = self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams=hparams, nonpadding=features_to_nonpadding( features, "targets"), losses=losses) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} ret = tf.reshape(decoder_output, targets_shape) if losses: return ret, {"extra_loss": tf.add_n(losses)} else: return ret
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets, hparams, name): """Original Transformer decoder.""" with tf.variable_scope(name): targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_bias = transformer.transformer_prepare_decoder( targets, hparams) decoder_input = tf.nn.dropout( decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = transformer.transformer_decoder( decoder_input, encoder_output, decoder_self_bias, encoder_decoder_attention_bias, hparams) # Expand since t2t expects 4d tensors. return tf.expand_dims(decoder_output, axis=2)
def decode(self, decoder_input, encoder_output, encoder_decoder_attention_bias_slices, hparams): dir_path = os.path.dirname(os.path.realpath(__file__)) config_file = os.path.join(dir_path, "config.yml") config = yaml.load(open(config_file)) enc_name = config["model_params"].split('_')[0][3:] dec_name = "dec1d" if enc_name.endswith("2d") and enc_name != "all2d": dec_name = "dec2d" decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder( decoder_input, hparams) decoder_input = tf.nn.dropout( decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = getattr(decode_fn, dec_name)( decoder_input, encoder_output, decoder_self_attention_bias, encoder_decoder_attention_bias_slices, hparams, "decoder") return decoder_output
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets, hparams, name): """Original Transformer decoder.""" with tf.variable_scope(name): targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_bias = ( transformer.transformer_prepare_decoder(targets, hparams)) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = transformer.transformer_decoder( decoder_input, encoder_output, decoder_self_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, axis=2) decoder_output_shape = common_layers.shape_list(decoder_output) decoder_output = tf.reshape( decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size]) # Expand since t2t expects 4d tensors. return decoder_output
def model_fn_body(self, features): hparams = self._hparams targets = features["targets"] inputs = features.get("inputs") target_space = features.get("target_space_id") inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) (encoder_input, encoder_attention_bias, _) = transformer.transformer_prepare_encoder(inputs, target_space, hparams) (decoder_input, decoder_self_attention_bias ) = transformer.transformer_prepare_decoder(targets, hparams) # We need masks of the form batch size x input sequences # Biases seem to be of the form batch_size x 1 x input sequences x vec dim # Squeeze out dim one, and get the first element of each vector. encoder_mask = tf.squeeze(encoder_attention_bias, [1])[:, :, 0] decoder_mask = tf.squeeze(decoder_self_attention_bias, [1])[:, :, 0] def residual_fn(x, y): return common_layers.layer_norm( x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout)) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) encoder_output = alt_transformer_encoder(encoder_input, residual_fn, encoder_mask, hparams) decoder_output = alt_transformer_decoder(decoder_input, encoder_output, residual_fn, decoder_mask, encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output
def model_fn_body(self, features): hparams = self._hparams encoder_input = features["inputs"] print(encoder_input.shape.as_list()) # ==> [None, None, None, 4, 300] encoder_input = flatten5d4d(encoder_input) print(encoder_input.shape.as_list()) # ==> [None, None, 4, 300] target_space = features["target_space_id"] print(target_space.shape.as_list()) # ==> [] # encode_lex encoder_output, encoder_decoder_attention_bias = self.encode_lex( encoder_input, target_space, hparams) targets = features["targets"] print(targets.shape.as_list()) targets = common_layers.flatten4d3d(targets) # decode_lex decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder( targets, hparams) decoder_output = self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams) return decoder_output
def model_fn_body(self, features): hparams = self._hparams targets = features["targets"] inputs = features.get("inputs") target_space = features.get("target_space_id") inputs = common_layers.flatten4d3d(inputs) targets = common_layers.flatten4d3d(targets) (encoder_input, encoder_attention_bias, _) = (transformer.transformer_prepare_encoder(inputs, target_space, hparams)) (decoder_input, _) = (transformer.transformer_prepare_decoder(targets, hparams)) encoder_mask = bias_to_mask(encoder_attention_bias) def residual_fn(x, y): return common_layers.layer_norm( x + tf.nn.dropout(y, 1.0 - hparams.residual_dropout)) encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout) decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout) encoder_output = alt_transformer_encoder(encoder_input, residual_fn, encoder_mask, hparams) decoder_output = alt_transformer_decoder(decoder_input, encoder_output, residual_fn, encoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, 2) return decoder_output
def decode_transformer(encoder_output, encoder_decoder_attention_bias, targets, hparams, name): """Original Transformer decoder.""" orig_hparams = hparams with tf.variable_scope(name, reuse=tf.AUTO_REUSE): targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_bias = ( transformer.transformer_prepare_decoder(targets, hparams)) decoder_input = tf.nn.dropout( decoder_input, 1.0 - hparams.layer_prepostprocess_dropout) decoder_output = transformer.transformer_decoder( decoder_input, encoder_output, decoder_self_bias, encoder_decoder_attention_bias, hparams) decoder_output = tf.expand_dims(decoder_output, axis=2) decoder_output_shape = common_layers.shape_list(decoder_output) decoder_output = tf.reshape( decoder_output, [decoder_output_shape[0], -1, 1, hparams.hidden_size]) # Expand since t2t expects 4d tensors. hparams = orig_hparams return decoder_output
def body(self, features, original_features): """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "targets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "target_space_id" Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams snippets = features.get(searchqa_problem.FeatureNames.SNIPPETS) questions = features.get(searchqa_problem.FeatureNames.QUESTION) target_space = features["target_space_id"] with tf.variable_scope('input'): # [batch_size, search_results_len, embed_sz] encoded_snippets = self.inputs_encoding( input=snippets, original_input=original_features.get( searchqa_problem.FeatureNames.SNIPPETS), initializer=tf.constant_initializer(1.0), scope='snippets_encoding') # [batch_size, 1, embed_sz] encoded_question = self.inputs_encoding( input=questions, original_input=original_features.get( searchqa_problem.FeatureNames.QUESTION), initializer=tf.constant_initializer(1.0), scope='question_encoding') # Concat snippets and questions to creat the inputs inputs = tf.concat([encoded_snippets, encoded_question], axis=1) # the input is 4D by default and it gets squeezed from 4D to 3D in the # encode function, so we need to make it 4D by inserting channel dim. inputs = tf.expand_dims(inputs, axis=2) losses = [] encoder_output, encoder_decoder_attention_bias = self.encode( inputs, target_space, hparams, features=features, losses=losses) targets = features["targets"] targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer.transformer_prepare_decoder( targets, hparams, features=features) decoder_output = self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=features_to_nonpadding( features, "targets"), losses=losses) ret = tf.reshape(decoder_output, targets_shape) if losses: return ret, {"extra_loss": tf.add_n(losses)} else: return ret
def get_decoder_autoregressive(): """Decoder input for autoregressive computation.""" (a, b) = transformer.transformer_prepare_decoder(targets, hparams) return (a, b, tf.constant(0.0))
def body(self, features): """Universal Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "targets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "target_space_id" Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams if hparams.add_postion_timing_signal: # Turning off addition of positional embedding in the encoder/decoder # preparation as we do it in the beginning of each step. hparams.pos = None if self.has_input: inputs = features["inputs"] target_space = features["target_space_id"] (encoder_output, encoder_decoder_attention_bias, enc_extra_output) = self.encode(inputs, target_space, hparams, features=features) else: (encoder_output, encoder_decoder_attention_bias, enc_extra_output) = (None, None, (None, None)) targets = features["targets"] targets = common_layers.flatten4d3d(targets) (decoder_input, decoder_self_attention_bias ) = transformer.transformer_prepare_decoder(targets, hparams, features=features) decoder_output, dec_extra_output = self.decode( decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=transformer.features_to_nonpadding(features, "targets")) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: print('returning attention loss') attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0: print('returning act loss') if self.has_input: enc_ponder_times, enc_remainders = enc_extra_output enc_act_loss = ( hparams.act_loss_weight * tf.reduce_mean(enc_ponder_times + enc_remainders)) else: enc_act_loss = 0.0 (dec_ponder_times, dec_remainders) = dec_extra_output dec_act_loss = (hparams.act_loss_weight * tf.reduce_mean(dec_ponder_times + dec_remainders)) act_loss = enc_act_loss + dec_act_loss tf.contrib.summary.scalar("act_loss", act_loss) return decoder_output, {"act_loss": act_loss} #grads = get_grads_and_vars(attention_loss) # dec_out_and_grads = tf.concat([decoder_output, grads], 1) # ¿0 or 1? access_output, access_state = self._access(decoder_output, dec_extra_output) return decoder_output, DNCState(access_output=access_output, access_state=access_state, controller_state=dec_extra_output)
def body(self, features): """Universal Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "targets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "target_space_id" Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams if hparams.add_position_timing_signal: # Turning off addition of positional embedding in the encoder/decoder # preparation as we do it in the beginning of each step. hparams.pos = None if self.has_input: inputs = features["inputs"] target_space = features["target_space_id"] (encoder_output, encoder_decoder_attention_bias, enc_extra_output) = self.encode( inputs, target_space, hparams, features=features) else: (encoder_output, encoder_decoder_attention_bias, enc_extra_output) = (None, None, (None, None)) targets = features["targets"] targets = common_layers.flatten4d3d(targets) (decoder_input, decoder_self_attention_bias) = transformer.transformer_prepare_decoder( targets, hparams, features=features) decoder_output, dec_extra_output = self.decode( decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=transformer.features_to_nonpadding(features, "targets")) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0: if self.has_input: enc_ponder_times, enc_remainders = enc_extra_output enc_act_loss = ( hparams.act_loss_weight * tf.reduce_mean(enc_ponder_times + enc_remainders)) else: enc_act_loss = 0.0 (dec_ponder_times, dec_remainders) = dec_extra_output dec_act_loss = ( hparams.act_loss_weight * tf.reduce_mean(dec_ponder_times + dec_remainders)) act_loss = enc_act_loss + dec_act_loss tf.contrib.summary.scalar("act_loss", act_loss) return decoder_output, {"act_loss": act_loss} return decoder_output
def get_decoder_from_vae(): """Decoder input computed by VAE.""" # Return decoder stuff. (a, b) = transformer.transformer_prepare_decoder( tf.squeeze(z, axis=2), hparams) return (a, b, kl_loss)
def body(self, features): """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs. [batch_size, input_length, 1, hidden_dim]. "targets": Target decoder outputs. [batch_size, decoder_length, 1, hidden_dim] "target_space_id": A scalar int from data_generators.problem.SpaceID. Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams losses = [] if self.has_input: inputs = features["inputs"] target_space = features["target_space_id"] encoder_output, encoder_decoder_attention_bias = self.encode( inputs, target_space, hparams, features=features, losses=losses) else: encoder_output, encoder_decoder_attention_bias = (None, None) targets = features["targets"] targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) left_decoder_input, left_decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams, features=features) right_decoder_input, right_decoder_self_attention_bias = transformer_prepare_decoder_right( targets, hparams, features=features) non_pad = nonpadding = features_to_nonpadding(features, "targets") with tf.variable_scope("left_decoder"): left_decoder_output = self.decode(left_decoder_input, encoder_output, encoder_decoder_attention_bias, left_decoder_self_attention_bias, hparams, nonpadding=non_pad, losses=losses) with tf.variable_scope("right_decoder"): right_decoder_output = self.decode( right_decoder_input, encoder_output, encoder_decoder_attention_bias, right_decoder_self_attention_bias, hparams, nonpadding=non_pad, losses=losses) decoder_output = transformer_bidirectional_joint_decoder( tf.squeeze(left_decoder_output, axis=2), tf.squeeze(right_decoder_output, axis=2), encoder_output, encoder_decoder_attention_bias, hparams, nonpadding=non_pad, save_weights_to=self.attention_weights, losses=losses) decoder_output = tf.expand_dims(decoder_output, axis=2) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} ret = tf.reshape(decoder_output, targets_shape) if losses: return ret, {"extra_loss": tf.add_n(losses)} else: return ret
def body(self, features): """R-Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "targets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "target_space_id" Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams if self.has_input: inputs = features["inputs"] target_space = features["target_space_id"] (encoder_output, encoder_decoder_attention_bias, enc_extra_output) = self.encode(inputs, target_space, hparams, features=features) else: (encoder_output, encoder_decoder_attention_bias, enc_extra_output) = (None, None, (None, None)) targets = features["targets"] targets = common_layers.flatten4d3d(targets) (decoder_input, decoder_self_attention_bias ) = transformer.transformer_prepare_decoder(targets, hparams, features=features) decoder_output, dec_extra_output = self.decode( decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=transformer.features_to_nonpadding(features, "targets")) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} if hparams.recurrence_type == "act" and hparams.act_loss_weight != 0: if self.has_input: enc_ponder_times, enc_remainders = enc_extra_output enc_act_loss = ( hparams.act_loss_weight * tf.reduce_mean(enc_ponder_times + enc_remainders)) else: enc_act_loss = 0.0 (dec_ponder_times, dec_remainders) = dec_extra_output dec_act_loss = (hparams.act_loss_weight * tf.reduce_mean(dec_ponder_times + dec_remainders)) act_loss = enc_act_loss + dec_act_loss tf.summary.scalar("act_loss", act_loss) return decoder_output, {"act_loss": act_loss} return decoder_output
def body(self, features): """CopyTransformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "targets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "targets_*": Additional decoder outputs to generate, for copying and pointing; [batch_size, decoder_length] "target_space_id": A scalar int from data_generators.problem.SpaceID. Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ hparams = self._hparams losses = [] inputs = features["inputs"] target_space = features["target_space_id"] encoder_output, encoder_decoder_attention_bias = self.encode( inputs, target_space, hparams, features=features, losses=losses) if "targets_actions" in features: targets = features["targets_actions"] else: tf.logging.warn( "CopyTransformer must be used with a SemanticParsing problem with a ShiftReduceGrammar; bad things will happen otherwise" ) targets = features["targets"] targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) decoder_input, decoder_self_attention_bias = transformer_prepare_decoder( targets, hparams, features=features) decoder_output = self.decode(decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=features_to_nonpadding( features, "targets"), losses=losses) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} decoder_output = tf.reshape(decoder_output, targets_shape) body_output = dict() target_modality = self._problem_hparams.target_modality \ if self._problem_hparams else {"targets": None} assert hparams.pointer_layer in ("attentive", "decaying_attentive") for key, modality in target_modality.items(): if isinstance(modality, CopyModality): with tf.variable_scope("copy_layer/" + key): if hparams.pointer_layer == "decaying_attentive": output_layer = DecayingAttentivePointerLayer( encoder_output) else: output_layer = AttentivePointerLayer(encoder_output) scores = output_layer(decoder_output) scores += encoder_decoder_attention_bias body_output[key] = scores else: body_output[key] = decoder_output if losses: return body_output, {"extra_loss": tf.add_n(losses)} else: return body_output
def body(self, features): """Transformer main model_fn. Args: features: Map of features to the model. Should contain the following: "inputs": Transformer inputs [batch_size, input_length, hidden_dim] "targets": Target decoder outputs. [batch_size, decoder_length, hidden_dim] "target_space_id": A scalar int from data_generators.problem.SpaceID. Returns: Final decoder representation. [batch_size, decoder_length, hidden_dim] """ tf.logging.info("Using PgScratch BODY function.") hparams = self._hparams losses = {} inputs = features["inputs"] target_space = features["target_space_id"] # encoder_output: <tf.float32>[batch_size, input_length, hidden_dim] # encoder_decoder_attention_bias: <tf.float32>[batch_size, input_length] encoder_output, encoder_decoder_attention_bias = self.encode( inputs, target_space, hparams, features=features, losses=losses) with tf.variable_scope("knowledge"): with tf.name_scope("knowledge_encoding"): # Encode knowledge. # <tf.float32>[batch_size, triple_num, emb_dim] fact_embedding, fact_lengths = self.encode_knowledge_bottom( features) tf.logging.info("Encoded knowledge") with tf.name_scope("knowledge_selection_and_loss"): # Compute knowledge selection and loss. triple_logits, avg_triple_selection_loss, knowledge_encoder_output, transe_loss = self.compute_knowledge_selection_and_loss( features, encoder_output, fact_embedding, fact_lengths, hparams.margin, hparams.num_negative_samples) losses["kb_loss"] = avg_triple_selection_loss losses["transe_loss"] = transe_loss if hparams.attend_kb: tf.logging.info("ATTEND_KB is ACTIVE") with tf.name_scope("knowledge_attention"): knowledge_padding = tf.zeros_like(triple_logits, dtype=tf.float32) knowledge_attention_bias = common_attention.attention_bias_ignore_padding( knowledge_padding) encoder_output = tf.concat( [knowledge_encoder_output, encoder_output], 1) encoder_decoder_attention_bias = tf.concat( [knowledge_attention_bias, encoder_decoder_attention_bias], -1) else: tf.logging.info("ATTEND_KB is INACTIVE") targets = features["targets"] targets_shape = common_layers.shape_list(targets) targets = common_layers.flatten4d3d(targets) (decoder_input, decoder_self_attention_bias ) = transformer.transformer_prepare_decoder(targets, hparams, features=features) decode_kwargs = {} decoder_output = self.decode( decoder_input, encoder_output, encoder_decoder_attention_bias, decoder_self_attention_bias, hparams, nonpadding=transformer.features_to_nonpadding(features, "targets"), losses=losses, **decode_kwargs) expected_attentions = features.get("expected_attentions") if expected_attentions is not None: attention_loss = common_attention.encoder_decoder_attention_loss( expected_attentions, self.attention_weights, hparams.expected_attention_loss_type, hparams.expected_attention_loss_multiplier) return decoder_output, {"attention_loss": attention_loss} ret = tf.reshape(decoder_output, targets_shape) if losses: return ret, losses else: return ret
def transformer_prepare_decoder(targets, features): return transformer.transformer_prepare_decoder( targets, hparams, features)