def encoder_projection(self) -> EncoderProjection: if self._encoder_projection is not None: return self._encoder_projection if not self.encoders: log("No direct encoder input. Using empty initial state") return empty_initial_state if self._rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of " "encoded states") return concat_encoder_projection log("Using linear projection of encoders as the initial state") return linear_encoder_projection(self.dropout_keep_prob)
def encoder_projection(self) -> EncoderProjection: if self._encoder_projection is not None: return self._encoder_projection if not self.encoders: log("No direct encoder input. Using empty initial state") return empty_initial_state if self._rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of " "encoded states") return concat_encoder_projection log("Using linear projection of encoders as the initial state") return linear_encoder_projection(self.dropout_keep_prob)
def __init__(self, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, embedding_size: int = None, embeddings_source: EmbeddedSequence = None, tie_embeddings: bool = False, label_smoothing: float = None, rnn_size: int = None, output_projection: OutputProjectionSpec = None, encoder_projection: EncoderProjection = None, attentions: List[BaseAttention] = None, attention_on_input: bool = False, rnn_cell: str = "GRU", conditional_gru: bool = False, supress_unk: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder. vocabulary: Target vocabulary. data_id: Target data series. name: Name of the decoder. Should be unique accross all Neural Monkey objects. max_output_len: Maximum length of an output sequence. dropout_keep_prob: Probability of keeping a value during dropout. embedding_size: Size of embedding vectors for target words. embeddings_source: Embedded sequence to take embeddings from. tie_embeddings: Use decoder.embedding_matrix also in place of the output decoding matrix. rnn_size: Size of the decoder hidden state, if None set according to encoders. output_projection: How to generate distribution over vocabulary from decoder_outputs. encoder_projection: How to construct initial state from encoders. attention: The attention object to use. Optional. rnn_cell: RNN Cell used by the decoder (GRU or LSTM). conditional_gru: Flag whether to use the Conditional GRU architecture. attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. supress_unk: If true, decoder will not produce symbols for unknown tokens. reuse: Reuse the model variables from the given model part. """ check_argument_types() AutoregressiveDecoder.__init__(self, name=name, vocabulary=vocabulary, data_id=data_id, max_output_len=max_output_len, dropout_keep_prob=dropout_keep_prob, embedding_size=embedding_size, embeddings_source=embeddings_source, tie_embeddings=tie_embeddings, label_smoothing=label_smoothing, supress_unk=supress_unk, reuse=reuse, save_checkpoint=save_checkpoint, load_checkpoint=load_checkpoint, initializers=initializers) self.encoders = encoders self.output_projection_spec = output_projection self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell self.attentions = [] # type: List[BaseAttention] if attentions is not None: self.attentions = attentions if rnn_size is not None: self.rnn_size = rnn_size if encoder_projection is not None: self.encoder_projection = encoder_projection elif not self.encoders: log("No direct encoder input. Using empty initial state") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.output.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or " "'NematusGRU'. Not {}".format(self._rnn_cell_str)) if self.output_projection_spec is None: log("No output projection specified - using tanh projection") self.output_projection = nonlinear_output(self.rnn_size, tf.tanh)[0] self.output_projection_size = self.rnn_size elif isinstance(self.output_projection_spec, tuple): self.output_projection_spec = cast(Tuple[OutputProjection, int], self.output_projection_spec) (self.output_projection, self.output_projection_size) = self.output_projection_spec else: self.output_projection = cast(OutputProjection, self.output_projection_spec) self.output_projection_size = self.rnn_size if self._attention_on_input: self.input_projection = self.input_plus_attention else: self.input_projection = self.embed_input_symbol with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass self._variable_scope.set_initializer( tf.random_normal_initializer(stddev=0.001)) # TODO when it is possible, remove the printing of the cost var log("Decoder initalized. Cost var: {}".format(str(self.cost))) log("Runtime logits tensor: {}".format(str(self.runtime_logits)))
def __init__(self, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, rnn_size: int = None, embedding_size: int = None, output_projection: OutputProjectionSpec = None, encoder_projection: EncoderProjection = None, attentions: List[BaseAttention] = None, embeddings_source: EmbeddedSequence = None, attention_on_input: bool = True, rnn_cell: str = "GRU", conditional_gru: bool = False, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series name: Name of the decoder. Should be unique accross all Neural Monkey objects max_output_len: Maximum length of an output sequence dropout_keep_prob: Probability of keeping a value during dropout Keyword arguments: rnn_size: Size of the decoder hidden state, if None set according to encoders. embedding_size: Size of embedding vectors for target words output_projection: How to generate distribution over vocabulary from decoder rnn_outputs encoder_projection: How to construct initial state from encoders attention: The attention object to use. Optional. embeddings_source: Embedded sequence to take embeddings from rnn_cell: RNN Cell used by the decoder (GRU or LSTM) conditional_gru: Flag whether to use the Conditional GRU architecture attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() log("Initializing decoder, name: '{}'".format(name)) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self.embedding_size = embedding_size self.rnn_size = rnn_size self.output_projection_spec = output_projection self.encoder_projection = encoder_projection self.attentions = attentions self.embeddings_source = embeddings_source self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell if self.attentions is None: self.attentions = [] if self.embedding_size is None and self.embeddings_source is None: raise ValueError("You must specify either embedding size or the " "embedded sequence from which to reuse the " "embeddings (e.g. set either 'embedding_size' or " " 'embeddings_source' parameter)") if self.embeddings_source is not None: if self.embedding_size is not None: warn("Overriding the embedding_size parameter with the" " size of the reused embeddings from the encoder.") self.embedding_size = ( self.embeddings_source.embedding_matrix.get_shape()[1].value) if self.encoder_projection is None: if not self.encoders: log("No encoder - language model only.") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.output.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or " "'NematusGRU'. Not {}".format(self._rnn_cell_str)) if self.output_projection_spec is None: log("No output projection specified - using tanh projection") self.output_projection = nonlinear_output( self.rnn_size, tf.tanh)[0] self.output_projection_size = self.rnn_size elif isinstance(self.output_projection_spec, tuple): (self.output_projection, self.output_projection_size) = tuple(self.output_projection_spec) else: self.output_projection = self.output_projection_spec self.output_projection_size = self.rnn_size if self._attention_on_input: self.input_projection = self.input_plus_attention else: self.input_projection = self.embed_input_symbol with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass # TODO when it is possible, remove the printing of the cost var log("Decoder initalized. Cost var: {}".format(str(self.cost))) log("Runtime logits tensor: {}".format(str(self.runtime_logits)))
def __init__(self, encoders: List[Any], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, rnn_size: Optional[int] = None, embedding_size: Optional[int] = None, output_projection: Optional[Callable[ [tf.Tensor, tf.Tensor, List[tf.Tensor]], tf.Tensor]]=None, encoder_projection: Optional[Callable[ [tf.Tensor, Optional[int], Optional[List[Any]]], tf.Tensor]]=None, use_attention: bool = False, embeddings_encoder: Optional[Any] = None, attention_on_input: bool = True, rnn_cell: str = 'GRU', conditional_gru: bool = False, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series name: Name of the decoder. Should be unique accross all Neural Monkey objects max_output_len: Maximum length of an output sequence dropout_keep_prob: Probability of keeping a value during dropout Keyword arguments: rnn_size: Size of the decoder hidden state, if None set according to encoders. embedding_size: Size of embedding vectors for target words output_projection: How to generate distribution over vocabulary from decoder rnn_outputs encoder_projection: How to construct initial state from encoders use_attention: Flag whether to look at attention vectors of the encoders embeddings_encoder: Encoder to take embeddings from rnn_cell: RNN Cell used by the decoder (GRU or LSTM) conditional_gru: Flag whether to use the Conditional GRU architecture attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() log("Initializing decoder, name: '{}'".format(name)) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self.embedding_size = embedding_size self.rnn_size = rnn_size self.output_projection = output_projection self.encoder_projection = encoder_projection self.use_attention = use_attention self.embeddings_encoder = embeddings_encoder self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell if self.embedding_size is None and self.embeddings_encoder is None: raise ValueError("You must specify either embedding size or the " "encoder from which to reuse the embeddings (" "e.g. set either 'embedding_size' or " " 'embeddings_encoder' parameter)") if self.embeddings_encoder is not None: if self.embedding_size is not None: warn("Overriding the embedding_size parameter with the" " size of the reused embeddings from the encoder.") self.embedding_size = ( self.embeddings_encoder.embedding_matrix.get_shape()[1].value) if self.encoder_projection is None: if not self.encoders: log("No encoder - language model only.") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.encoded.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'") if self.output_projection is None: log("No output projection specified - using simple concatenation") self.output_projection = no_deep_output with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass self._create_input_placeholders() self._create_training_placeholders() self._create_initial_state() self._create_embedding_matrix() with tf.name_scope("output_projection"): self.decoding_w = tf.get_variable( "state_to_word_W", [self.rnn_size, len(self.vocabulary)], initializer=tf.random_uniform_initializer(-0.5, 0.5)) self.decoding_b = tf.get_variable( "state_to_word_b", [len(self.vocabulary)], initializer=tf.constant_initializer( - math.log(len(self.vocabulary)))) # POSLEDNI TRAIN INPUT SE V DEKODOVACI FUNKCI NEPOUZIJE # (jen jako target) embedded_train_inputs = self.embed_and_dropout( self.train_inputs[:-1]) # POZOR TADY SE NEDELA DROPOUT embedded_go_symbols = tf.nn.embedding_lookup(self.embedding_matrix, self.go_symbols) # fetch train attention objects self._train_attention_objects = {} # type: Dict[Attentive, tf.Tensor] if self.use_attention: with tf.name_scope("attention_object"): self._train_attention_objects = { e: e.create_attention_object() for e in self.encoders if isinstance(e, Attentive)} self.train_logits, _, _ = self._decoding_loop( embedded_go_symbols, train_inputs=embedded_train_inputs, train_mode=True) assert not tf.get_variable_scope().reuse tf.get_variable_scope().reuse_variables() # fetch runtime attention objects self._runtime_attention_objects = {} # type: Dict[Attentive, tf.Tensor] if self.use_attention: self._runtime_attention_objects = { e: e.create_attention_object() for e in self.encoders if isinstance(e, Attentive)} (self.runtime_logits, self.runtime_rnn_states, self.runtime_mask) = self._decoding_loop( embedded_go_symbols, train_mode=False) train_targets = tf.transpose(self.train_inputs) self.train_xents = tf.contrib.seq2seq.sequence_loss( tf.stack(self.train_logits, 1), train_targets, tf.transpose(self.train_padding), average_across_batch=False) self.train_loss = tf.reduce_mean(self.train_xents) self.cost = self.train_loss self.train_logprobs = [tf.nn.log_softmax(l) for l in self.train_logits] self.decoded = [tf.argmax(logit[:, 1:], 1) + 1 for logit in self.runtime_logits] self.runtime_loss = tf.contrib.seq2seq.sequence_loss( tf.stack(self.runtime_logits, 1), train_targets, tf.transpose(self.train_padding)) self.runtime_logprobs = [tf.nn.log_softmax(l) for l in self.runtime_logits] self._visualize_attention() log("Decoder initalized.")
def __init__(self, encoders: List[Any], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, rnn_size: Optional[int] = None, embedding_size: Optional[int] = None, output_projection: Optional[ Callable[[tf.Tensor, tf.Tensor, List[tf.Tensor]], tf.Tensor]] = None, encoder_projection: Optional[ Callable[[tf.Tensor, Optional[int], Optional[List[Any]]], tf.Tensor]] = None, use_attention: bool = False, embeddings_source: Optional[EmbeddedSequence] = None, attention_on_input: bool = True, rnn_cell: str = 'GRU', conditional_gru: bool = False, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series name: Name of the decoder. Should be unique accross all Neural Monkey objects max_output_len: Maximum length of an output sequence dropout_keep_prob: Probability of keeping a value during dropout Keyword arguments: rnn_size: Size of the decoder hidden state, if None set according to encoders. embedding_size: Size of embedding vectors for target words output_projection: How to generate distribution over vocabulary from decoder rnn_outputs encoder_projection: How to construct initial state from encoders use_attention: Flag whether to look at attention vectors of the encoders embeddings_source: Embedded sequence to take embeddings from rnn_cell: RNN Cell used by the decoder (GRU or LSTM) conditional_gru: Flag whether to use the Conditional GRU architecture attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() log("Initializing decoder, name: '{}'".format(name)) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self.embedding_size = embedding_size self.rnn_size = rnn_size self.output_projection = output_projection self.encoder_projection = encoder_projection self.use_attention = use_attention self.embeddings_source = embeddings_source self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell if self.embedding_size is None and self.embeddings_source is None: raise ValueError("You must specify either embedding size or the " "embedded sequence from which to reuse the " "embeddings (e.g. set either 'embedding_size' or " " 'embeddings_source' parameter)") if self.embeddings_source is not None: if self.embedding_size is not None: warn("Overriding the embedding_size parameter with the" " size of the reused embeddings from the encoder.") self.embedding_size = ( self.embeddings_source.embedding_matrix.get_shape()[1].value) if self.encoder_projection is None: if not self.encoders: log("No encoder - language model only.") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.encoded.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'") if self.output_projection is None: log("No output projection specified - using simple concatenation") self.output_projection = no_deep_output if self._attention_on_input: self.input_projection = self.input_plus_attention else: self.input_projection = self.embed_input_symbol with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass # fetch train attention objects self._train_attention_objects = {} # type: Dict[Attentive, tf.Tensor] if self.use_attention: with tf.name_scope("attention_object"): self._train_attention_objects = { e: e.create_attention_object() for e in self.encoders if isinstance(e, Attentive) } assert not tf.get_variable_scope().reuse tf.get_variable_scope().reuse_variables() # fetch runtime attention objects self._runtime_attention_objects = {} # type: Dict[Attentive, tf.Tensor] if self.use_attention: self._runtime_attention_objects = { e: e.create_attention_object() for e in self.encoders if isinstance(e, Attentive) } # TODO when it is possible, remove the printing of the cost var log("Decoder initalized. Cost var: {}".format(str(self.cost)))