def output_projection_spec(self) -> Tuple[OutputProjection, int]: if self._output_projection_spec is None: log("No output projection specified - using tanh projection") return (nonlinear_output(self.rnn_size, tf.tanh)[0], self.rnn_size) if isinstance(self._output_projection_spec, tuple): return self._output_projection_spec return cast(OutputProjection, self._output_projection_spec), self.rnn_size
def __init__(self, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, embedding_size: int = None, embeddings_source: EmbeddedSequence = None, tie_embeddings: bool = False, label_smoothing: float = None, rnn_size: int = None, output_projection: OutputProjectionSpec = None, encoder_projection: EncoderProjection = None, attentions: List[BaseAttention] = None, attention_on_input: bool = False, rnn_cell: str = "GRU", conditional_gru: bool = False, supress_unk: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder. vocabulary: Target vocabulary. data_id: Target data series. name: Name of the decoder. Should be unique accross all Neural Monkey objects. max_output_len: Maximum length of an output sequence. dropout_keep_prob: Probability of keeping a value during dropout. embedding_size: Size of embedding vectors for target words. embeddings_source: Embedded sequence to take embeddings from. tie_embeddings: Use decoder.embedding_matrix also in place of the output decoding matrix. rnn_size: Size of the decoder hidden state, if None set according to encoders. output_projection: How to generate distribution over vocabulary from decoder_outputs. encoder_projection: How to construct initial state from encoders. attention: The attention object to use. Optional. rnn_cell: RNN Cell used by the decoder (GRU or LSTM). conditional_gru: Flag whether to use the Conditional GRU architecture. attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. supress_unk: If true, decoder will not produce symbols for unknown tokens. reuse: Reuse the model variables from the given model part. """ check_argument_types() AutoregressiveDecoder.__init__(self, name=name, vocabulary=vocabulary, data_id=data_id, max_output_len=max_output_len, dropout_keep_prob=dropout_keep_prob, embedding_size=embedding_size, embeddings_source=embeddings_source, tie_embeddings=tie_embeddings, label_smoothing=label_smoothing, supress_unk=supress_unk, reuse=reuse, save_checkpoint=save_checkpoint, load_checkpoint=load_checkpoint, initializers=initializers) self.encoders = encoders self.output_projection_spec = output_projection self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell self.attentions = [] # type: List[BaseAttention] if attentions is not None: self.attentions = attentions if rnn_size is not None: self.rnn_size = rnn_size if encoder_projection is not None: self.encoder_projection = encoder_projection elif not self.encoders: log("No direct encoder input. Using empty initial state") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.output.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or " "'NematusGRU'. Not {}".format(self._rnn_cell_str)) if self.output_projection_spec is None: log("No output projection specified - using tanh projection") self.output_projection = nonlinear_output(self.rnn_size, tf.tanh)[0] self.output_projection_size = self.rnn_size elif isinstance(self.output_projection_spec, tuple): self.output_projection_spec = cast(Tuple[OutputProjection, int], self.output_projection_spec) (self.output_projection, self.output_projection_size) = self.output_projection_spec else: self.output_projection = cast(OutputProjection, self.output_projection_spec) self.output_projection_size = self.rnn_size if self._attention_on_input: self.input_projection = self.input_plus_attention else: self.input_projection = self.embed_input_symbol with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass self._variable_scope.set_initializer( tf.random_normal_initializer(stddev=0.001)) # TODO when it is possible, remove the printing of the cost var log("Decoder initalized. Cost var: {}".format(str(self.cost))) log("Runtime logits tensor: {}".format(str(self.runtime_logits)))
def __init__(self, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, rnn_size: int = None, embedding_size: int = None, output_projection: OutputProjectionSpec = None, encoder_projection: EncoderProjection = None, attentions: List[BaseAttention] = None, embeddings_source: EmbeddedSequence = None, attention_on_input: bool = True, rnn_cell: str = "GRU", conditional_gru: bool = False, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series name: Name of the decoder. Should be unique accross all Neural Monkey objects max_output_len: Maximum length of an output sequence dropout_keep_prob: Probability of keeping a value during dropout Keyword arguments: rnn_size: Size of the decoder hidden state, if None set according to encoders. embedding_size: Size of embedding vectors for target words output_projection: How to generate distribution over vocabulary from decoder rnn_outputs encoder_projection: How to construct initial state from encoders attention: The attention object to use. Optional. embeddings_source: Embedded sequence to take embeddings from rnn_cell: RNN Cell used by the decoder (GRU or LSTM) conditional_gru: Flag whether to use the Conditional GRU architecture attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() log("Initializing decoder, name: '{}'".format(name)) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self.embedding_size = embedding_size self.rnn_size = rnn_size self.output_projection_spec = output_projection self.encoder_projection = encoder_projection self.attentions = attentions self.embeddings_source = embeddings_source self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell if self.attentions is None: self.attentions = [] if self.embedding_size is None and self.embeddings_source is None: raise ValueError("You must specify either embedding size or the " "embedded sequence from which to reuse the " "embeddings (e.g. set either 'embedding_size' or " " 'embeddings_source' parameter)") if self.embeddings_source is not None: if self.embedding_size is not None: warn("Overriding the embedding_size parameter with the" " size of the reused embeddings from the encoder.") self.embedding_size = ( self.embeddings_source.embedding_matrix.get_shape()[1].value) if self.encoder_projection is None: if not self.encoders: log("No encoder - language model only.") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.output.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or " "'NematusGRU'. Not {}".format(self._rnn_cell_str)) if self.output_projection_spec is None: log("No output projection specified - using tanh projection") self.output_projection = nonlinear_output( self.rnn_size, tf.tanh)[0] self.output_projection_size = self.rnn_size elif isinstance(self.output_projection_spec, tuple): (self.output_projection, self.output_projection_size) = tuple(self.output_projection_spec) else: self.output_projection = self.output_projection_spec self.output_projection_size = self.rnn_size if self._attention_on_input: self.input_projection = self.input_plus_attention else: self.input_projection = self.embed_input_symbol with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass # TODO when it is possible, remove the printing of the cost var log("Decoder initalized. Cost var: {}".format(str(self.cost))) log("Runtime logits tensor: {}".format(str(self.runtime_logits)))