def __init__(self, name: str, cnn: CNNEncoder) -> None: check_argument_types() ModelPart.__init__( self, name, save_checkpoint=None, load_checkpoint=None) self._cnn = cnn
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, max_output_len: int, dropout_keep_prob: float = 1.0, embedding_size: int = None, embeddings_source: EmbeddedSequence = None, tie_embeddings: bool = False, label_smoothing: float = None, supress_unk: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize parameters common for all autoregressive decoders. Arguments: name: Name of the decoder. Should be unique accross all Neural Monkey objects. vocabulary: Target vocabulary. data_id: Target data series. max_output_len: Maximum length of an output sequence. reuse: Reuse the variables from the model part. dropout_keep_prob: Probability of keeping a value during dropout. embedding_size: Size of embedding vectors for target words. embeddings_source: Embedded sequence to take embeddings from. tie_embeddings: Use decoder.embedding_matrix also in place of the output decoding matrix. label_smoothing: Label smoothing parameter. supress_unk: If true, decoder will not produce symbols for unknown tokens. """ ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self._embedding_size = embedding_size self.embeddings_source = embeddings_source self.label_smoothing = label_smoothing self.tie_embeddings = tie_embeddings self.supress_unk = supress_unk self.encoder_states = lambda: [] # type: Callable[[], List[tf.Tensor]] self.encoder_masks = lambda: [] # type: Callable[[], List[tf.Tensor]] # Check the values of the parameters (max_output_len, ...) if self.max_output_len <= 0: raise ValueError( "Maximum sequence length must be a positive integer.") if self._embedding_size is not None and self._embedding_size <= 0: raise ValueError("Embedding size must be a positive integer.") if self.dropout_keep_prob < 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep probability must be a real number " "in the interval [0,1].")
def __init__(self, encoder: SentenceEncoder, decoder: Decoder, data_id: str, name: str) -> None: ModelPart.__init__(self, name, None, None) self.encoder = encoder self.decoder = decoder self.data_id = data_id self.ref_alignment = tf.placeholder( tf.float32, [None, self.decoder.max_output_len, self.encoder.max_input_len], name="ref_alignment") # shape will be [max_output_len, batch_size, max_input_len] self.alignment_target = tf.transpose(self.ref_alignment, perm=[1, 0, 2]) _, self.train_loss = self._make_decoder(runtime_mode=False) self.decoded, self.runtime_loss = self._make_decoder(runtime_mode=True) tf.summary.scalar("alignment_train_xent", self.train_loss, collections=["summary_train"])
def __init__(self, name: str, cnn: CNNEncoder) -> None: check_argument_types() ModelPart.__init__( self, name, save_checkpoint=None, load_checkpoint=None) self._cnn = cnn
def __init__(self, name: str, encoder: TemporalStateful, vocabulary: Vocabulary, data_id: str, max_length: int = None, merge_repeated_targets: bool = False, merge_repeated_outputs: bool = True, beam_width: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.max_length = max_length self.merge_repeated_targets = merge_repeated_targets self.merge_repeated_outputs = merge_repeated_outputs self.beam_width = beam_width
def __init__(self, name: str, parent_decoder: AutoregressiveDecoder, beam_size: int, max_steps: int, length_normalization: float) -> None: """Construct the beam search decoder graph. Arguments: name: The name for the model part. parent_decoder: An autoregressive decoder from which to sample. beam_size: The number of hypotheses in the beam. max_steps: The maximum number of time steps to perform. length_normalization: The alpha parameter from Eq. 14 in the paper. """ check_argument_types() ModelPart.__init__(self, name) self.parent_decoder = parent_decoder self.beam_size = beam_size self.length_normalization = length_normalization self.max_steps_int = max_steps # Create a placeholder for maximum number of steps that is necessary # during ensembling, when the decoder is called repetitively with the # max_steps attribute set to one. self.max_steps = tf.placeholder_with_default(self.max_steps_int, []) self._initial_loop_state = None # type: Optional[BeamSearchLoopState]
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, max_output_len: int, dropout_keep_prob: float = 1.0, embedding_size: int = None, embeddings_source: EmbeddedSequence = None, tie_embeddings: bool = False, label_smoothing: float = None, supress_unk: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize parameters common for all autoregressive decoders. Arguments: name: Name of the decoder. Should be unique accross all Neural Monkey objects. vocabulary: Target vocabulary. data_id: Target data series. max_output_len: Maximum length of an output sequence. reuse: Reuse the variables from the model part. dropout_keep_prob: Probability of keeping a value during dropout. embedding_size: Size of embedding vectors for target words. embeddings_source: Embedded sequence to take embeddings from. tie_embeddings: Use decoder.embedding_matrix also in place of the output decoding matrix. label_smoothing: Label smoothing parameter. supress_unk: If true, decoder will not produce symbols for unknown tokens. """ ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self._embedding_size = embedding_size self.embeddings_source = embeddings_source self.label_smoothing = label_smoothing self.tie_embeddings = tie_embeddings self.supress_unk = supress_unk self.encoder_states = lambda: [] # type: Callable[[], List[tf.Tensor]] self.encoder_masks = lambda: [] # type: Callable[[], List[tf.Tensor]] # Check the values of the parameters (max_output_len, ...) if self.max_output_len <= 0: raise ValueError( "Maximum sequence length must be a positive integer.") if self._embedding_size is not None and self._embedding_size <= 0: raise ValueError("Embedding size must be a positive integer.") if self.dropout_keep_prob < 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep probability must be a real number " "in the interval [0,1].")
def __init__(self, name: str, input_shape: List[int], output_shape: int, data_id: str, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) assert len(input_shape) == 3 if output_shape <= 0: raise ValueError("Output vector dimension must be postive.") self.data_id = data_id with self.use_scope(): features_shape = [None] + input_shape # type: ignore self.image_features = tf.placeholder(tf.float32, shape=features_shape, name="image_input") self.flat = tf.reduce_mean(self.image_features, axis=[1, 2], name="average_image") self.project_w = tf.get_variable( name="img_init_proj_W", shape=[input_shape[2], output_shape], initializer=tf.glorot_normal_initializer()) self.project_b = tf.get_variable( name="img_init_b", shape=[output_shape], initializer=tf.zeros_initializer())
def __init__(self, name: str, dimension: int, data_id: str, output_shape: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate StatefulFiller. Arguments: name: Name of the model part. dimension: Dimensionality of the input. data_id: Series containing the numpy objects. output_shape: Dimension of optional state projection. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dimension = dimension self.output_shape = output_shape if self.dimension <= 0: raise ValueError("Input vector dimension must be positive.") if self.output_shape is not None and self.output_shape <= 0: raise ValueError("Output vector dimension must be positive.") with self.use_scope(): self.vector = tf.placeholder(tf.float32, [None, self.dimension], "input_vector")
def __init__(self, name: str, input_sequence: Attendable, hidden_size: int, num_heads: int, output_size: int = None, state_proj_size: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize an instance of the encoder.""" check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.hidden_size = hidden_size self.num_heads = num_heads self.output_size = output_size self.state_proj_size = state_proj_size self.dropout_keep_prob = dropout_keep_prob if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].")
def __init__(self, name: str, encoders: List[Stateful], data_id: str, layers: List[int] = None, activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 1.0, dimension: int = 1, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) assert check_argument_types() self.encoders = encoders self.data_id = data_id self.max_output_len = 1 self.dimension = dimension self._layers = layers self._activation_fn = activation_fn self._dropout_keep_prob = dropout_keep_prob tf.summary.scalar("val_optimization_cost", self.cost, collections=["summary_val"]) tf.summary.scalar("train_optimization_cost", self.cost, collections=["summary_train"])
def __init__(self, name: str, encoders: List[TemporalStateful], vocabulary: Vocabulary, data_id: str, max_output_len: int = None, hidden_dim: int = None, activation: Callable = tf.nn.relu, dropout_keep_prob: float = 1.0, add_start_symbol: bool = False, add_end_symbol: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.hidden_dim = hidden_dim self.activation = activation self.dropout_keep_prob = dropout_keep_prob self.add_start_symbol = add_start_symbol self.add_end_symbol = add_end_symbol
def __init__(self, name: str, input_sequence: Attendable, hidden_size: int, num_heads: int, output_size: int = None, state_proj_size: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize an instance of the encoder.""" check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.hidden_size = hidden_size self.num_heads = num_heads self.output_size = output_size self.state_proj_size = state_proj_size self.dropout_keep_prob = dropout_keep_prob if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].")
def __init__(self, encoder: RecurrentEncoder, decoder: Decoder, data_id: str, name: str, reuse: ModelPart = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, None, None, initializers) self.encoder = encoder self.decoder = decoder self.data_id = data_id if not isinstance(self.encoder.input_sequence, Sequence): raise TypeError("Expected Sequence type in encoder.input_sequence") self.enc_input = cast(Sequence, self.encoder.input_sequence) # TODO this is here to call the lazy properties which create # the list of attention distribbutions # pylint: disable=pointless-statement self.decoder.runtime_logits self.decoder.train_logits # pylint: enable=pointless-statement _, self.train_loss = self._make_decoder(runtime_mode=False) self.decoded, self.runtime_loss = self._make_decoder(runtime_mode=True) tf.summary.scalar("alignment_train_xent", self.train_loss, collections=["summary_train"])
def __init__(self, name: str, input_sequence: EmbeddedSequence, conv_features: int, encoder_layers: int, kernel_width: int = 5, dropout_keep_prob: float = 1.0, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.encoder_layers = encoder_layers self.conv_features = conv_features self.kernel_width = kernel_width self.dropout_keep_prob = dropout_keep_prob if conv_features <= 0: raise ValueError("Number of features must be a positive integer.") if encoder_layers <= 0: raise ValueError( "Number of encoder layers must be a positive integer.") if self.input_sequence.max_length is None: raise ValueError("Input sequence must have a maximum length for " "positional embeddings with this encoder") self.max_input_length = self.input_sequence.max_length log("Initializing convolutional seq2seq encoder, name {}".format( self.name))
def __init__(self, name: str, dimension: int, data_id: str, output_shape: int = None, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() if dimension <= 0: raise ValueError("Input vector dimension must be postive.") if output_shape is not None and output_shape <= 0: raise ValueError("Output vector dimension must be postive.") self.vector = tf.placeholder(tf.float32, shape=[None, dimension]) self.data_id = data_id with self.use_scope(): if output_shape is not None and dimension != output_shape: project_w = tf.get_variable(shape=[dimension, output_shape], name="img_init_proj_W") project_b = tf.get_variable(name="img_init_b", shape=[output_shape], initializer=tf.zeros_initializer()) self._encoded = tf.matmul(self.vector, project_w) + project_b else: self._encoded = self.vector
def __init__( self, name: str, parent: TemporalStateful, factor: int, projection_size: int = None, projection_activation: Activation = None) -> None: """Initialize SentenceSplitter. Args: parent: TemporalStateful whose states will be split. factor: Factor by which the states will be split - the resulting sequence will be longer by this factor. projection_size: If not None, specifies dimensionality of a projection before state splitting. projection_activation: Non-linearity function for the optional projection. """ check_argument_types() ModelPart.__init__( self, name=name, save_checkpoint=None, load_checkpoint=None, initializers=None) self.parent = parent self.factor = factor self.projection_size = projection_size self.activation = projection_activation if projection_size is not None and projection_size % factor != 0: raise ValueError(( "Dimension of projection ({}) must be " "dividable by the given factor ({}).").format( projection_size, factor))
def __init__(self, name: str, dimension: int, data_id: str, output_shape: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate StatefulFiller. Arguments: name: Name of the model part. dimension: Dimensionality of the input. data_id: Series containing the numpy objects. output_shape: Dimension of optional state projection. """ check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dimension = dimension self.output_shape = output_shape if self.dimension <= 0: raise ValueError("Input vector dimension must be positive.") if self.output_shape is not None and self.output_shape <= 0: raise ValueError("Output vector dimension must be positive.")
def __init__(self, name: str, encoder: TemporalStateful, vocabulary: Vocabulary, data_id: str, max_length: int = None, merge_repeated_targets: bool = False, merge_repeated_outputs: bool = True, beam_width: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.max_length = max_length self.merge_repeated_targets = merge_repeated_targets self.merge_repeated_outputs = merge_repeated_outputs self.beam_width = beam_width
def __init__(self, name: str, input_shape: List[int], data_id: str, projection_dim: int = None, ff_hidden_dim: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate SpatialFiller. Args: name: Name of the model part. input_shape: Dimensionality of the input. data_id: Name of the data series with numpy objects. projection_dim: Optional, dimension of the states projection. """ check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.input_shape = input_shape self.projection_dim = projection_dim self.ff_hidden_dim = ff_hidden_dim if self.ff_hidden_dim is not None and self.projection_dim is None: raise ValueError( "projection_dim must be provided when using ff_hidden_dim") if len(self.input_shape) != 3: raise ValueError("The input shape should have 3 dimensions.")
def __init__(self, name: str, encoder: Union[RecurrentEncoder, SentenceEncoder], vocabulary: Vocabulary, data_id: str, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.rnn_size = int(self.encoder.temporal_states.get_shape()[-1]) with self.use_scope(): self.train_targets = tf.placeholder(tf.int32, [None, None], "labeler_targets") self.train_weights = tf.placeholder(tf.float32, [None, None], "labeler_padding_weights")
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor]
def __init__(self, name: str, input_sequence: TemporalStateful, rnn_layers: List[RNNSpecTuple], add_residual: bool = False, add_layer_norm: bool = False, include_final_layer_norm: bool = True, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of a recurrent encoder. Arguments: name: ModelPart name. input_seqeunce: The input sequence for the encoder. rnn_size: The dimension of the RNN hidden state vector. rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory cell to use. rnn_direction: One of "forward", "backward", "bidirectional". In what order to process the input sequence. Note that choosing "bidirectional" will double the resulting vector dimension as well as the number of encoder parameters. add_residual: Add residual connections to the RNN layer output. add_layer_norm: Add layer normalization after each RNN layer. include_final_layer_norm: Normalize also output of the network. dropout_keep_prob: 1 - dropout probability. save_checkpoint: ModelPart save checkpoint file. load_checkpoint: ModelPart load checkpoint file. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) TemporalStatefulWithOutput.__init__(self) self.input_sequence = input_sequence self.dropout_keep_prob = dropout_keep_prob self.rnn_specs = [_make_rnn_spec(*r) for r in rnn_layers] self.add_residual = add_residual self.add_layer_norm = add_layer_norm self.include_final_layer_norm = include_final_layer_norm if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") layer_sizes = [ 2 * layer.size if layer.direction == "bidirectional" else layer.size for layer in self.rnn_specs ] if add_residual and len(set(layer_sizes)) > 1: raise ValueError( "When using residual connectiong, all layers must have " "the same size, but are {}.".format(layer_sizes)) self._variable_scope.set_initializer( tf.random_normal_initializer(stddev=0.001))
def __init__(self, name: str, input_sequence: TemporalStateful, rnn_layers: List[RNNSpecTuple], add_residual: bool = False, add_layer_norm: bool = False, include_final_layer_norm: bool = True, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of a recurrent encoder. Arguments: name: ModelPart name. input_seqeunce: The input sequence for the encoder. rnn_size: The dimension of the RNN hidden state vector. rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory cell to use. rnn_direction: One of "forward", "backward", "bidirectional". In what order to process the input sequence. Note that choosing "bidirectional" will double the resulting vector dimension as well as the number of encoder parameters. add_residual: Add residual connections to the RNN layer output. add_layer_norm: Add layer normalization after each RNN layer. include_final_layer_norm: Normalize also output of the network. dropout_keep_prob: 1 - dropout probability. save_checkpoint: ModelPart save checkpoint file. load_checkpoint: ModelPart load checkpoint file. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) TemporalStatefulWithOutput.__init__(self) self.input_sequence = input_sequence self.dropout_keep_prob = dropout_keep_prob self.rnn_specs = [_make_rnn_spec(*r) for r in rnn_layers] self.add_residual = add_residual self.add_layer_norm = add_layer_norm self.include_final_layer_norm = include_final_layer_norm if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") layer_sizes = [ 2 * layer.size if layer.direction == "bidirectional" else layer.size for layer in self.rnn_specs] if add_residual and len(set(layer_sizes)) > 1: raise ValueError( "When using residual connectiong, all layers must have " "the same size, but are {}.".format(layer_sizes)) self._variable_scope.set_initializer( tf.random_normal_initializer(stddev=0.001))
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor] self.train_mode = tf.placeholder(tf.bool, [], "train_mode")
def __init__(self, name: str, input_sequence: TemporalStateful, ff_hidden_size: int, depth: int, n_heads: int, dropout_keep_prob: float = 1.0, attention_dropout_keep_prob: float = 1.0, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create an encoder of the Transformer model. Described in Vaswani et al. (2017), arxiv.org/abs/1706.03762 Arguments: input_sequence: Embedded input sequence. name: Name of the decoder. Should be unique accross all Neural Monkey objects. dropout_keep_prob: Probability of keeping a value during dropout. Keyword arguments: ff_hidden_size: Size of the feedforward sublayers. n_heads: Number of the self-attention heads. depth: Number of sublayers. attention_dropout_keep_prob: Probability of keeping a value during dropout on the attention output. """ check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.input_sequence = input_sequence self.model_dimension = self.input_sequence.dimension self.ff_hidden_size = ff_hidden_size self.depth = depth self.n_heads = n_heads self.dropout_keep_prob = dropout_keep_prob self.attention_dropout_keep_prob = attention_dropout_keep_prob if self.depth <= 0: raise ValueError("Depth must be a positive integer.") if self.ff_hidden_size <= 0: raise ValueError("Feed forward hidden size must be a " "positive integer.") if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") if (self.attention_dropout_keep_prob <= 0.0 or self.attention_dropout_keep_prob > 1.0): raise ValueError("Dropout keep prob for attn must be in (0,1].") self.train_mode = tf.placeholder(tf.bool, [], "train_mode") log("Output op: {}".format(self.output))
def __init__(self, name: str, parent_decoder: AutoregressiveDecoder, beam_size: int, max_steps: int, length_normalization: float) -> None: """Construct the beam search decoder graph. Arguments: name: The name for the model part. parent_decoder: An autoregressive decoder from which to sample. beam_size: The number of hypotheses in the beam. max_steps: The maximum number of time steps to perform. length_normalization: The alpha parameter from Eq. 14 in the paper. """ check_argument_types() ModelPart.__init__(self, name) self.parent_decoder = parent_decoder self.beam_size = beam_size self.length_normalization = length_normalization self.max_steps_int = max_steps # Create a placeholder for maximum number of steps that is necessary # during ensembling, when the decoder is called repetitively with the # max_steps attribute set to one. self.max_steps = tf.placeholder_with_default(max_steps, []) # This is an ugly hack for handling the whole graph when expanding to # the beam. We need to access all the inner states of the network in # the graph, replace them with beam-size-times copied originals, create # the beam search graph, and then replace the inner states back. has_encoder = (hasattr(self.parent_decoder, "encoder_states") and hasattr(self.parent_decoder, "encoder_mask")) if has_encoder: enc_states = self.parent_decoder.encoder_states enc_mask = self.parent_decoder.encoder_mask if has_encoder and enc_states is not None and enc_mask is not None: setattr(self.parent_decoder, "encoder_states", self.expand_to_beam(enc_states)) setattr(self.parent_decoder, "encoder_mask", self.expand_to_beam(enc_mask)) # Create the beam search symbolic graph. with self.use_scope(): self.initial_loop_state = self.get_initial_loop_state() self.outputs = self.decoding_loop() # Reassign the original encoder states and mask back if has_encoder: setattr(self.parent_decoder, "encoder_states", enc_states) setattr(self.parent_decoder, "encoder_mask", enc_mask)
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new ``BaseAttention`` object.""" ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor]
def __init__(self, name: str, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, layers: List[int], activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Construct a new instance of the sequence classifier. Args: name: Name of the decoder. Should be unique accross all Neural Monkey objects encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series layers: List defining structure of the NN. Ini example: layers=[100,20,5] ;creates classifier with hidden layers of size 100, 20, 5 and one output layer depending on the size of vocabulary activation_fn: activation function used on the output of each hidden layer. dropout_keep_prob: Probability of keeping a value during dropout """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.layers = layers self.activation_fn = activation_fn self.dropout_keep_prob = dropout_keep_prob self.max_output_len = 1 with self.use_scope(): self.gt_inputs = [tf.placeholder(tf.int32, [None], "targets")] mlp_input = tf.concat([enc.output for enc in self.encoders], 1) self._mlp = MultilayerPerceptron(mlp_input, self.layers, self.dropout_keep_prob, len(self.vocabulary), activation_fn=self.activation_fn, train_mode=self.train_mode) tf.summary.scalar("train_optimization_cost", self.cost, collections=["summary_train"])
def __init__(self, name: str, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new ``BaseAttention`` object.""" ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor]
def __init__(self, name: str, input_sequence: TemporalStateful, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize an instance of the pooling layer.""" check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence
def __init__(self, name: str, encoders: List[Any], vocabulary: Vocabulary, data_id: str, layers: Optional[List[int]] = None, activation: Callable[[tf.Tensor], tf.Tensor] = tf.tanh, dropout_keep_prob: float = 0.5, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.layers = layers self.activation = activation self.dropout_keep_prob = dropout_keep_prob self.max_output_len = 1 with tf.variable_scope(name): self.learning_step = tf.get_variable( "learning_step", [], trainable=False, initializer=tf.constant_initializer(0)) self.dropout_placeholder = \ tf.placeholder(tf.float32, name="dropout_plc") self.gt_inputs = [ tf.placeholder(tf.int32, shape=[None], name="targets") ] mlp_input = tf.concat(1, [enc.encoded for enc in encoders]) mlp = MultilayerPerceptron(mlp_input, layers, self.dropout_placeholder, len(vocabulary)) self.loss_with_gt_ins = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( mlp.logits, self.gt_inputs[0])) self.loss_with_decoded_ins = self.loss_with_gt_ins self.cost = self.loss_with_gt_ins self.decoded_seq = [mlp.classification] self.decoded_logits = [mlp.logits] self.runtime_logprobs = [tf.nn.log_softmax(mlp.logits)] tf.scalar_summary('val_optimization_cost', self.cost, collections=["summary_val"]) tf.scalar_summary('train_optimization_cost', self.cost, collections=["summary_train"])
def __init__(self, encoder: RecurrentEncoder, decoder: Decoder, data_id: str, name: str, reuse: ModelPart = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, None, None, initializers) self.encoder = encoder self.decoder = decoder self.data_id = data_id
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor] with self.use_scope(): self.train_mode = tf.placeholder(tf.bool, [], "train_mode")
def __init__(self, name: str, data_id: str, convolutions: List[Union[ConvSpec, ResNetSpec, MaxPoolSpec]], image_height: int, image_width: int, pixel_dim: int, fully_connected: List[int] = None, batch_normalize: bool = False, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize a convolutional network for image processing. The convolutional network can consist of plain convolutions, max-pooling layers and residual block. In the configuration, they are specified using the following tuples. * convolution: ("C", kernel_size, stride, padding, out_channel); * max / average pooling: ("M"/"A", kernel_size, stride, padding); * residual block: ("R", kernel_size, out_channels). Padding must be either "valid" or "same". Args: convolutions: Configuration of convolutional layers. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the image. pixel_dim: Number of color channels in the input images. dropout_keep_prob: Probability of keeping neurons active in dropout. Dropout is done between all convolutional layers and fully connected layer. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.image_height = image_height self.image_width = image_width self.pixel_dim = pixel_dim self.convolutions = convolutions self.fully_connected = fully_connected self.batch_normalize = batch_normalize
def __init__(self, name: str, max_input_len: int, vocabularies: List[Vocabulary], data_ids: List[str], embedding_sizes: List[int], rnn_size: int, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None, **kwargs) -> None: """Construct a new instance of the factored encoder. Args: max_input_len: Maximum input length (longer sequences are trimmed) vocabularies: List of vocabularies indexed data_ids: List of data series IDs embedding_sizes: List of embedding sizes for each data series name: The name for this encoder. [sentence_encoder] rnn_size: The size of the hidden state Keyword arguments: use_noisy_activations: Boolean flag whether to use noisy activation functions in RNN cells. (see neuralmonkey.nn.noisy_gru_cell) [False] attention_type: The attention to use. [None] attention_fertility: Fertility for CoverageAttention (if used). [3] dropout_keep_prob: 1 - Dropout probability [1] """ attention_type = kwargs.get("attention_type", None) Attentive.__init__(self, attention_type) ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) assert check_argument_types() self.vocabularies = vocabularies self.data_ids = data_ids self.embedding_sizes = embedding_sizes self.max_input_len = max_input_len self.rnn_size = rnn_size self.dropout_keep_prob = kwargs.get("dropout_keep_prob", 1) self.use_noisy_activations = kwargs.get("use_noisy_activations", False) log("Building encoder graph, name: '{}'.".format(self.name)) with tf.variable_scope(self.name): self._create_encoder_graph() log("Encoder graph constructed.")
def __init__(self, name: str, input_shape: List[int], data_id: str, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) assert len(input_shape) == 3 self.data_id = data_id self.input_shape = input_shape
def __init__(self, name: str, data_id: str, network_type: str, slim_models_path: str, load_checkpoint: str = None, spatial_layer: str = None, encoded_layer: str = None, initializers: InitializerSpecs = None) -> None: """Initialize pre-trained ImageNet network. Args: name: Name of the model part (the ImageNet network, will be in its scope, independently on `name`). data_id: Id of series with images (list of 3D numpy arrays) network_type: Identifier of ImageNet network from TFSlim. spatial_layer: String identifier of the convolutional map (model's endpoint). Check TFSlim documentation for end point specifications. encoded_layer: String id of the network layer that will be used as input of a decoder. `None` means averaging the convolutional maps. path_to_models: Path to Slim models in tensorflow/models repository. load_checkpoint: Checkpoint file from which the pre-trained network is loaded. """ check_argument_types() ModelPart.__init__(self, name, load_checkpoint=load_checkpoint, initializers=initializers, save_checkpoint=None) sys.path.insert(0, slim_models_path) self.data_id = data_id self.network_type = network_type self.spatial_layer = spatial_layer self.encoded_layer = encoded_layer if self.network_type not in SUPPORTED_NETWORKS: raise ValueError( "Network '{}' is not among the supported ones ({})".format( self.network_type, ", ".join(SUPPORTED_NETWORKS.keys()))) self.net_specification = SUPPORTED_NETWORKS[self.network_type]() self.height, self.width = self.net_specification.image_size
def __init__(self, name: str, encoder: Union[RecurrentEncoder, SentenceEncoder], vocabulary: Vocabulary, data_id: str, dropout_keep_prob: float = 1.0, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.rnn_size = int(self.encoder.temporal_states.get_shape()[-1])
def __init__(self, name: str, data_id: str, convolutions: List[Union[ConvSpec, ResNetSpec, MaxPoolSpec]], image_height: int, image_width: int, pixel_dim: int, fully_connected: List[int] = None, batch_normalize: bool = False, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize a convolutional network for image processing. The convolutional network can consist of plain convolutions, max-pooling layers and residual block. In the configuration, they are specified using the following tuples. * convolution: ("C", kernel_size, stride, padding, out_channel); * max / average pooling: ("M"/"A", kernel_size, stride, padding); * residual block: ("R", kernel_size, out_channels). Padding must be either "valid" or "same". Args: convolutions: Configuration of convolutional layers. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the image. pixel_dim: Number of color channels in the input images. dropout_keep_prob: Probability of keeping neurons active in dropout. Dropout is done between all convolutional layers and fully connected layer. """ check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.image_height = image_height self.image_width = image_width self.pixel_dim = pixel_dim self.convolutions = convolutions self.fully_connected = fully_connected self.batch_normalize = batch_normalize
def __init__(self, name: str, encoder: RecurrentEncoder, vocabulary: Vocabulary, data_id: str, dropout_keep_prob: float = 1.0, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.rnn_size = self.encoder.rnn_size * 2 self.max_output_len = self.encoder.input_sequence.max_length
def __init__(self, name: str, parent_decoder: Decoder, max_steps: int, beam_size: int, length_normalization: float, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) assert check_argument_types() self.parent_decoder = parent_decoder self._beam_size = beam_size self._max_steps = max_steps self._length_normalization = length_normalization self.outputs = self._decoding_loop()
def __init__(self, name: str, data_id: str, input_size: int, max_input_len: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.input_size = input_size self.max_input_len = max_input_len self.dropout_keep_prob = dropout_keep_prob
def __init__(self, name: str, max_length: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Construct a new `Sequence` object. Arguments: name: The name for the `ModelPart` object max_length: Maximum length of sequences in the object (not checked) save_checkpoint: The save_checkpoint parameter for `ModelPart` load_checkpoint: The load_checkpoint parameter for `ModelPart` """ ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.max_length = max_length if self.max_length is not None and self.max_length <= 0: raise ValueError("Max sequence length must be a positive integer.")
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, filters: List[Tuple[int, int]], max_input_len: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of the CNN sequence encoder. Based on: Yoon Kim: Convolutional Neural Networks for Sentence Classification (http://emnlp2014.org/papers/pdf/EMNLP2014181.pdf) Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word filters: Specification of CNN filters. It is a list of tuples specifying the filter size and number of channels. dropout_keep_prob: The dropout keep probability (default 1.0) """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.dropout_keep_prob = dropout_keep_prob self.filters = filters
def __init__(self, name: str, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, layers: List[int], activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Construct a new instance of the sequence classifier. Args: name: Name of the decoder. Should be unique accross all Neural Monkey objects encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series layers: List defining structure of the NN. Ini example: layers=[100,20,5] ;creates classifier with hidden layers of size 100, 20, 5 and one output layer depending on the size of vocabulary activation_fn: activation function used on the output of each hidden layer. dropout_keep_prob: Probability of keeping a value during dropout """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.layers = layers self.activation_fn = activation_fn self.dropout_keep_prob = dropout_keep_prob self.max_output_len = 1
def __init__(self, name: str, encoders: List[Stateful], data_id: str, layers: List[int] = None, activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 1.0, dimension: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.data_id = data_id self.max_output_len = 1 self.dimension = dimension self._layers = layers self._activation_fn = activation_fn self._dropout_keep_prob = dropout_keep_prob
def __init__(self, name: str, input_sequence: TemporalStateful, ff_hidden_size: int, depth: int, n_heads: int, dropout_keep_prob: float = 1.0, attention_dropout_keep_prob: float = 1.0, target_space_id: int = None, use_att_transform_bias: bool = False, use_positional_encoding: bool = True, input_for_cross_attention: Attendable = None, n_cross_att_heads: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create an encoder of the Transformer model. Described in Vaswani et al. (2017), arxiv.org/abs/1706.03762 Arguments: input_sequence: Embedded input sequence. name: Name of the decoder. Should be unique accross all Neural Monkey objects. reuse: Reuse the model variables. dropout_keep_prob: Probability of keeping a value during dropout. target_space_id: Specifies the modality of the target space. use_att_transform_bias: Add bias when transforming qkv vectors for attention. use_positional_encoding: If True, position encoding signal is added to the input. Keyword arguments: ff_hidden_size: Size of the feedforward sublayers. n_heads: Number of the self-attention heads. depth: Number of sublayers. attention_dropout_keep_prob: Probability of keeping a value during dropout on the attention output. input_for_cross_attention: An attendable model part that is attended using cross-attention on every layer of the decoder, analogically to how encoder is attended in the decoder. n_cross_att_heads: Number of heads used in the cross-attention. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.ff_hidden_size = ff_hidden_size self.depth = depth self.n_heads = n_heads self.dropout_keep_prob = dropout_keep_prob self.attention_dropout_keep_prob = attention_dropout_keep_prob self.target_space_id = target_space_id self.use_att_transform_bias = use_att_transform_bias self.use_positional_encoding = use_positional_encoding self.input_for_cross_attention = input_for_cross_attention self.n_cross_att_heads = n_cross_att_heads if self.depth <= 0: raise ValueError("Depth must be a positive integer.") if self.ff_hidden_size <= 0: raise ValueError("Feed forward hidden size must be a " "positive integer.") if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") if (self.attention_dropout_keep_prob <= 0.0 or self.attention_dropout_keep_prob > 1.0): raise ValueError("Dropout keep prob for attn must be in (0,1].") if self.target_space_id is not None and (self.target_space_id >= 32 or self.target_space_id < 0): raise ValueError( "If provided, the target space ID should be between 0 and 31. " "Was: {}".format(self.target_space_id)) if (input_for_cross_attention is None) != (n_cross_att_heads is None): raise ValueError( "Either both input_for_cross_attention and n_cross_att_heads " "must be provided or none of them.") self._variable_scope.set_initializer(tf.variance_scaling_initializer( mode="fan_avg", distribution="uniform"))