def predict(self, input): input_variable = variable_from_sentence(self.lang1, input) # Run through encoder encoder_hidden = self.encoder.init_hidden() encoder_outputs, encoder_hidden = self.encoder(input_variable, encoder_hidden) decoder_hidden = self._hidden_encoder_to_decoder(encoder_hidden) query = beam_search(5, self.decoder, decoder_hidden, encoder_outputs, self.lang2) query = fix_parentheses(query) return query
def _translate(self, process_id, input_item, models, sess): """ Actual translation (model sampling). """ # unpack input item attributes k = input_item.k x = input_item.batch #max_ratio = input_item.max_ratio y_dummy = numpy.zeros(shape=(len(x), 1)) x, x_mask, _, _ = prepare_data(x, y_dummy, maxlen=None) sample = inference.beam_search(models, sess, x, x_mask, k) return sample
def decode_at_test(self, enc_output, cross_attn_mask, batch_size, beam_size, do_sample): """ Returns the probability distribution over target-side tokens conditioned on the output of the encoder; performs decoding via auto-regression at test time. """ def _decode_step(target_embeddings, memories): """ Decode the encoder-generated representations into target-side logits with auto-regression. """ # Propagate inputs through the encoder stack dec_output = target_embeddings # NOTE: No self-attention mask is applied at decoding, as future information is unavailable for layer_id in range(1, self.config.num_decoder_layers + 1): dec_output, memories['layer_{:d}'.format(layer_id)] = \ self.decoder_stack[layer_id]['self_attn'].forward( dec_output, None, None, memories['layer_{:d}'.format(layer_id)]) dec_output, _ = \ self.decoder_stack[layer_id]['cross_attn'].forward(dec_output, enc_output, cross_attn_mask) dec_output = self.decoder_stack[layer_id]['ffn'].forward( dec_output) # Return prediction at the final time-step to be consistent with the inference pipeline dec_output = dec_output[:, -1, :] return dec_output, memories def _pre_process_targets(step_target_ids, current_time_step): """ Pre-processes target token ids before they're passed on as input to the decoder for auto-regressive decoding. """ # Embed target_ids target_embeddings = self._embed(step_target_ids) signal_slice = positional_signal[:, current_time_step - 1:current_time_step, :] target_embeddings += signal_slice if self.config.dropout_embeddings > 0: target_embeddings = tf.layers.dropout( target_embeddings, rate=self.config.dropout_embeddings, training=self.training) return target_embeddings def _decoding_function(step_target_ids, current_time_step, memories): """ Generates logits for the target-side token predicted for the next-time step with auto-regression. """ # Embed the model's predictions up to the current time-step; add positional information, mask target_embeddings = _pre_process_targets(step_target_ids, current_time_step) # Pass encoder context and decoder embeddings through the decoder dec_output, memories = _decode_step(target_embeddings, memories) # Project decoder stack outputs and apply the soft-max non-linearity step_logits = self.softmax_projection_layer.project(dec_output) return step_logits, memories with tf.variable_scope(self.name): # Create nodes self._build_graph() positional_signal = get_positional_signal( self.config.translation_max_len, self.config.embedding_size, self.float_dtype) if beam_size > 0: # Initialize target IDs with <GO> initial_ids = tf.cast(tf.fill([batch_size], 1), dtype=self.int_dtype) initial_memories = self._get_initial_memories( batch_size, beam_size=beam_size) output_sequences, scores = beam_search( _decoding_function, initial_ids, initial_memories, self.int_dtype, self.float_dtype, self.config.translation_max_len, batch_size, beam_size, self.embedding_layer.get_vocab_size(), 0, self.config.length_normalization_alpha) else: # Initialize target IDs with <GO> initial_ids = tf.cast(tf.fill([batch_size, 1], 1), dtype=self.int_dtype) initial_memories = self._get_initial_memories(batch_size, beam_size=1) output_sequences, scores = greedy_search( _decoding_function, initial_ids, initial_memories, self.int_dtype, self.float_dtype, self.config.translation_max_len, batch_size, 0, do_sample, time_major=False) return output_sequences, scores