def __init__(self, name: str, input_sequence: Attendable, hidden_size: int, num_heads: int, output_size: int = None, state_proj_size: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize an instance of the encoder.""" check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.hidden_size = hidden_size self.num_heads = num_heads self.output_size = output_size self.state_proj_size = state_proj_size self.dropout_keep_prob = dropout_keep_prob if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].")
def __init__(self, name: str, input_shape: List[int], data_id: str, projection_dim: int = None, ff_hidden_dim: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate SpatialFiller. Args: name: Name of the model part. input_shape: Dimensionality of the input. data_id: Name of the data series with numpy objects. projection_dim: Optional, dimension of the states projection. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.input_shape = input_shape self.projection_dim = projection_dim self.ff_hidden_dim = ff_hidden_dim if self.ff_hidden_dim is not None and self.projection_dim is None: raise ValueError( "projection_dim must be provided when using ff_hidden_dim") if len(self.input_shape) != 3: raise ValueError("The input shape should have 3 dimensions.")
def __init__(self, name: str, encoders: List[Stateful], data_id: str, layers: List[int] = None, activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 1.0, dimension: int = 1, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) assert check_argument_types() self.encoders = encoders self.data_id = data_id self.max_output_len = 1 self.dimension = dimension self._layers = layers self._activation_fn = activation_fn self._dropout_keep_prob = dropout_keep_prob tf.summary.scalar("val_optimization_cost", self.cost, collections=["summary_val"]) tf.summary.scalar("train_optimization_cost", self.cost, collections=["summary_train"])
def __init__(self, name: str, encoder: Union[RecurrentEncoder, SentenceEncoder], vocabulary: Vocabulary, data_id: str, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.rnn_size = int(self.encoder.temporal_states.get_shape()[-1]) with self.use_scope(): self.train_targets = tf.placeholder(tf.int32, [None, None], "labeler_targets") self.train_weights = tf.placeholder(tf.float32, [None, None], "labeler_padding_weights")
def __init__(self, name: str, cnn: CNNEncoder) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint=None, load_checkpoint=None) self._cnn = cnn
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, max_output_len: int, dropout_keep_prob: float = 1.0, embedding_size: int = None, embeddings_source: EmbeddedSequence = None, tie_embeddings: bool = False, label_smoothing: float = None, supress_unk: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize parameters common for all autoregressive decoders. Arguments: name: Name of the decoder. Should be unique accross all Neural Monkey objects. vocabulary: Target vocabulary. data_id: Target data series. max_output_len: Maximum length of an output sequence. reuse: Reuse the variables from the model part. dropout_keep_prob: Probability of keeping a value during dropout. embedding_size: Size of embedding vectors for target words. embeddings_source: Embedded sequence to take embeddings from. tie_embeddings: Use decoder.embedding_matrix also in place of the output decoding matrix. label_smoothing: Label smoothing parameter. supress_unk: If true, decoder will not produce symbols for unknown tokens. """ ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self._embedding_size = embedding_size self.embeddings_source = embeddings_source self.label_smoothing = label_smoothing self.tie_embeddings = tie_embeddings self.supress_unk = supress_unk self.encoder_states = lambda: [] # type: Callable[[], List[tf.Tensor]] self.encoder_masks = lambda: [] # type: Callable[[], List[tf.Tensor]] # Check the values of the parameters (max_output_len, ...) if self.max_output_len <= 0: raise ValueError( "Maximum sequence length must be a positive integer.") if self._embedding_size is not None and self._embedding_size <= 0: raise ValueError("Embedding size must be a positive integer.") if self.dropout_keep_prob < 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep probability must be a real number " "in the interval [0,1].")
def __init__(self, name: str, dimension: int, data_id: str, output_shape: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate StatefulFiller. Arguments: name: Name of the model part. dimension: Dimensionality of the input. data_id: Series containing the numpy objects. output_shape: Dimension of optional state projection. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dimension = dimension self.output_shape = output_shape if self.dimension <= 0: raise ValueError("Input vector dimension must be positive.") if self.output_shape is not None and self.output_shape <= 0: raise ValueError("Output vector dimension must be positive.") with self.use_scope(): self.vector = tf.placeholder(tf.float32, [None, self.dimension], "input_vector")
def __init__(self, name: str, input_sequence: EmbeddedSequence, conv_features: int, encoder_layers: int, kernel_width: int = 5, dropout_keep_prob: float = 1.0, attention_type: type = None, attention_state_size: int = None, attention_fertility: int = 3, save_checkpoint: str = None, load_checkpoint: str = None) -> None: assert check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size, attention_fertility=attention_fertility) self.input_sequence = input_sequence self.encoder_layers = encoder_layers self.conv_features = conv_features self.kernel_width = kernel_width self.dropout_keep_prob = dropout_keep_prob if conv_features <= 0: raise ValueError("Number of features must be a positive integer.") if encoder_layers <= 0: raise ValueError( "Number of encoder layers must be a positive integer.") log("Initializing convolutional seq2seq encoder, name {}".format( self.name))
def __init__(self, name: str, dimension: int, data_id: str, output_shape: int = None, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() if dimension <= 0: raise ValueError("Input vector dimension must be postive.") if output_shape is not None and output_shape <= 0: raise ValueError("Output vector dimension must be postive.") self.vector = tf.placeholder( tf.float32, shape=[None, dimension]) self.data_id = data_id with self.use_scope(): if output_shape is not None and dimension != output_shape: project_w = tf.get_variable( shape=[dimension, output_shape], name="img_init_proj_W") project_b = tf.get_variable( name="img_init_b", shape=[output_shape], initializer=tf.zeros_initializer()) self.encoded = tf.matmul( self.vector, project_w) + project_b else: self.encoded = self.vector
def __init__(self, name: str, parent_decoder: AutoregressiveDecoder, beam_size: int, length_normalization: float, max_steps: int = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.parent_decoder = parent_decoder self._beam_size = beam_size self._length_normalization = length_normalization # The parent_decoder is one step ahead. This is required for ensembling # support. # At the end of the Nth step we generate logits for ensembling # in the N+1th step by the parent_decoder. These need to be first # ensembled outside of the session.run before finishing the N+1th # step of the beam_search_decoder (collecting topk outputs, selecting # beams and running next parent_decoder step based on the chosen beam). if max_steps is None: max_steps = parent_decoder.max_output_len - 1 self._max_steps = tf.constant(max_steps) self.max_output_len = max_steps # Feedables self._search_state = None # type: Optional[SearchState] self._decoder_state = None # type: Optional[NamedTuple] # Output self.outputs = self._decoding_loop()
def __init__(self, name: str, encoders: List[TemporalStateful], vocabulary: Vocabulary, data_id: str, max_output_len: int = None, hidden_dim: int = None, activation: Callable = tf.nn.relu, dropout_keep_prob: float = 1.0, add_start_symbol: bool = False, add_end_symbol: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.hidden_dim = hidden_dim self.activation = activation self.dropout_keep_prob = dropout_keep_prob self.add_start_symbol = add_start_symbol self.add_end_symbol = add_end_symbol
def __init__(self, name: str, input_sequence: EmbeddedSequence, conv_features: int, encoder_layers: int, kernel_width: int = 5, dropout_keep_prob: float = 1.0, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.encoder_layers = encoder_layers self.conv_features = conv_features self.kernel_width = kernel_width self.dropout_keep_prob = dropout_keep_prob if conv_features <= 0: raise ValueError("Number of features must be a positive integer.") if encoder_layers <= 0: raise ValueError( "Number of encoder layers must be a positive integer.") if self.input_sequence.max_length is None: raise ValueError("Input sequence must have a maximum length for " "positional embeddings with this encoder") self.max_input_length = self.input_sequence.max_length log("Initializing convolutional seq2seq encoder, name {}".format( self.name))
def __init__(self, name: str, input_shape: List[int], data_id: str, projection_dim: int = None, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None, initializers: InitializerSpecs = None) -> None: """Instantiate SpatialFiller. Args: name: Name of the model part. input_shape: Dimensionality of the input. data_id: Name of the data series with numpy objects. projection_dim: Optional, dimension of the states projection. """ check_argument_types() ModelPart.__init__( self, name, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.input_shape = input_shape self.projection_dim = projection_dim if len(self.input_shape) != 3: raise ValueError("The input shape should have 3 dimensions.") features_shape = [None] + self.input_shape # type: ignore with self.use_scope(): self.spatial_input = tf.placeholder( tf.float32, shape=features_shape, name="spatial_states")
def __init__(self, encoder: RecurrentEncoder, decoder: Decoder, data_id: str, name: str) -> None: ModelPart.__init__(self, name, None, None) self.encoder = encoder self.decoder = decoder self.data_id = data_id self.ref_alignment = tf.placeholder( dtype=tf.float32, shape=[ None, self.decoder.max_output_len, self.encoder.input_sequence.max_length ], name="ref_alignment") # shape will be [max_output_len, batch_size, max_input_len] self.alignment_target = tf.transpose(self.ref_alignment, perm=[1, 0, 2]) _, self.train_loss = self._make_decoder(runtime_mode=False) self.decoded, self.runtime_loss = self._make_decoder(runtime_mode=True) tf.summary.scalar("alignment_train_xent", self.train_loss, collections=["summary_train"])
def __init__(self, name: str, parent_decoder: AutoregressiveDecoder, beam_size: int, max_steps: int, length_normalization: float) -> None: """Construct the beam search decoder graph. Arguments: name: The name for the model part. parent_decoder: An autoregressive decoder from which to sample. beam_size: The number of hypotheses in the beam. max_steps: The maximum number of time steps to perform. length_normalization: The alpha parameter from Eq. 14 in the paper. """ check_argument_types() ModelPart.__init__(self, name) self.parent_decoder = parent_decoder self.beam_size = beam_size self.length_normalization = length_normalization self.max_steps_int = max_steps # Create a placeholder for maximum number of steps that is necessary # during ensembling, when the decoder is called repetitively with the # max_steps attribute set to one. self.max_steps = tf.placeholder_with_default(self.max_steps_int, []) self._initial_loop_state = None # type: Optional[BeamSearchLoopState]
def __init__(self, name: str, cnn: CNNEncoder) -> None: check_argument_types() ModelPart.__init__( self, name, save_checkpoint=None, load_checkpoint=None) self._cnn = cnn
def __init__(self, name: str, encoder: SentenceEncoder, vocabulary: Vocabulary, data_id: str, dropout_keep_prob: float = 1.0, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.rnn_size = self.encoder.rnn_size * 2 self.max_output_len = self.encoder.max_input_len self.train_targets = tf.placeholder(tf.int32, shape=[None, None], name="labeler_targets") self.train_weights = tf.placeholder(tf.float32, shape=[None, None], name="labeler_padding_weights") self.train_mode = tf.placeholder(tf.bool, name="labeler_train_mode")
def __init__(self, name: str, encoder: TemporalStateful, vocabulary: Vocabulary, data_id: str, max_length: int = None, merge_repeated_targets: bool = False, merge_repeated_outputs: bool = True, beam_width: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.max_length = max_length self.merge_repeated_targets = merge_repeated_targets self.merge_repeated_outputs = merge_repeated_outputs self.beam_width = beam_width
def __init__(self, name: str, encoder: TemporalStateful, vocabulary: Vocabulary, data_id: str, max_length: int = None, merge_repeated_targets: bool = False, merge_repeated_outputs: bool = True, beam_width: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.max_length = max_length self.merge_repeated_targets = merge_repeated_targets self.merge_repeated_outputs = merge_repeated_outputs self.beam_width = beam_width log("CTC output tensor {}.".format(self.decoded))
def __init__(self, name: str, encoders: List[Any], attention_type: Type, attention_state_size: int, use_sentinels=False, share_attn_projections=False) -> None: """Initializes the encoder wrapper. Args: name: Name of the encoder / its scope. encoders: List of encoders to be wrapped. attention_type: Type of the attention combination. attention_state_size: Dimension of the state projection of attention energy computation. use_sentinels: Flag whether the sentinel mechanism should be added to the attention combination. share_attn_projections: Flag whether the hidden state projection should be shared for the both the energies computation and context vector computation. """ ModelPart.__init__(self, name, None, None) Attentive.__init__(self, attention_type) self.encoders = encoders self._attention_type = attention_type self._attention_state_size = attention_state_size self._use_sentinels = use_sentinels self._share_attn_projections = share_attn_projections self.encoded = tf.concat([e.encoded for e in encoders], 1)
def __init__(self, name: str, parent_decoder: Decoder, beam_size: int, length_normalization: float, max_steps: int = None, save_checkpoint: str = None, load_checkpoint: str = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.parent_decoder = parent_decoder self._beam_size = beam_size self._length_normalization = length_normalization # In the n+1th step, outputs of lenght n will be collected # and the n+1th step of decoder (which is discarded) will be executed if max_steps is None: max_steps = parent_decoder.max_output_len self._max_steps = tf.constant(max_steps + 1) self.max_output_len = max_steps # Feedables self._search_state = None # type: SearchState self._decoder_state = None # type: NamedTuple # Output self.outputs = self._decoding_loop()
def __init__(self, encoder: RecurrentEncoder, decoder: Decoder, data_id: str, name: str, initializers: InitializerSpecs = None) -> None: ModelPart.__init__(self, name, None, None, initializers) self.encoder = encoder self.decoder = decoder self.data_id = data_id if not isinstance(self.encoder.input_sequence, Sequence): raise TypeError("Expected Sequence type in encoder.input_sequence") self.enc_input = cast(Sequence, self.encoder.input_sequence) # TODO this is here to call the lazy properties which create # the list of attention distribbutions # pylint: disable=pointless-statement self.decoder.runtime_logits self.decoder.train_logits # pylint: enable=pointless-statement _, self.train_loss = self._make_decoder(runtime_mode=False) self.decoded, self.runtime_loss = self._make_decoder(runtime_mode=True) tf.summary.scalar("alignment_train_xent", self.train_loss, collections=["summary_train"])
def __init__(self, name: str, parent: TemporalStateful, factor: int, projection_size: int = None, projection_activation: Activation = None) -> None: """Initialize SentenceSplitter. Args: parent: TemporalStateful whose states will be split. factor: Factor by which the states will be split - the resulting sequence will be longer by this factor. projection_size: If not None, specifies dimensionality of a projection before state splitting. projection_activation: Non-linearity function for the optional projection. """ check_argument_types() ModelPart.__init__(self, name=name, save_checkpoint=None, load_checkpoint=None, initializers=None) self.parent = parent self.factor = factor self.projection_size = projection_size self.activation = projection_activation if projection_size is not None and projection_size % factor != 0: raise ValueError(("Dimension of projection ({}) must be " "dividable by the given factor ({}).").format( projection_size, factor))
def __init__(self, name: str, encoder: TemporalStateful, vocabulary: Vocabulary, data_id: str, decode_layer_index: int = 4, input_sequence: EmbeddedSequence = None, max_length: int = None, merge_repeated_outputs: bool = True, beam_width: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoder = encoder self.vocabulary = vocabulary self.data_id = data_id self.max_length = max_length self.decode_layer_index = decode_layer_index self.merge_repeated_outputs = merge_repeated_outputs self.beam_width = beam_width self.input_sequence = input_sequence
def __init__(self, name: str, input_shape: List[int], data_id: str, projection_dim: int = None, ff_hidden_dim: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate SpatialFiller. Args: name: Name of the model part. input_shape: Dimensionality of the input. data_id: Name of the data series with numpy objects. projection_dim: Optional, dimension of the states projection. """ check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.input_shape = input_shape self.projection_dim = projection_dim self.ff_hidden_dim = ff_hidden_dim if self.ff_hidden_dim is not None and self.projection_dim is None: raise ValueError( "projection_dim must be provided when using ff_hidden_dim") if len(self.input_shape) != 3: raise ValueError("The input shape should have 3 dimensions.")
def __init__(self, name: str, data_id: str, input_size: int, rnn_layers: List[RNNSpecTuple], max_input_len: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of the encoder. Arguments: data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder rnn_layers: A list of tuples specifying the size and, optionally, the direction ('forward', 'backward' or 'bidirectional') and cell type ('GRU' or 'LSTM') of each RNN layer. dropout_keep_prob: The dropout keep probability (default 1.0) """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self._rnn_layers = [_make_rnn_spec(*r) for r in rnn_layers] self.max_input_len = max_input_len self.input_size = input_size self.dropout_keep_prob = dropout_keep_prob
def __init__(self, name: str, input_sequence: Attendable, hidden_size: int, num_heads: int, output_size: int = None, state_proj_size: int = None, dropout_keep_prob: float = 1.0, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize an instance of the encoder.""" check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence self.hidden_size = hidden_size self.num_heads = num_heads self.output_size = output_size self.state_proj_size = state_proj_size self.dropout_keep_prob = dropout_keep_prob if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") with self.use_scope(): self._attention_states_dropped = dropout( get_attention_states(self.input_sequence), self.dropout_keep_prob, self.train_mode)
def __init__(self, name: str, input_sequence: Sequence, rnn_size: int, dropout_keep_prob: float = 1.0, rnn_cell: str = "GRU", save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create a new instance of a recurrent encoder.""" ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) TemporalStatefulWithOutput.__init__(self) check_argument_types() self.input_sequence = input_sequence self.rnn_size = rnn_size self.dropout_keep_prob = dropout_keep_prob self.rnn_cell_str = rnn_cell if self.rnn_size <= 0: raise ValueError("RNN size must be a positive integer.") if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") if self.rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
def __init__(self, name: str, dimension: int, data_id: str, output_shape: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Instantiate StatefulFiller. Arguments: name: Name of the model part. dimension: Dimensionality of the input. data_id: Series containing the numpy objects. output_shape: Dimension of optional state projection. """ check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dimension = dimension self.output_shape = output_shape if self.dimension <= 0: raise ValueError("Input vector dimension must be positive.") if self.output_shape is not None and self.output_shape <= 0: raise ValueError("Output vector dimension must be positive.")
def __init__(self, name: str, input_shape: List[int], output_shape: int, data_id: str, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) assert len(input_shape) == 3 if output_shape <= 0: raise ValueError("Output vector dimension must be postive.") self.data_id = data_id with self.use_scope(): features_shape = [None] + input_shape # type: ignore self.image_features = tf.placeholder(tf.float32, shape=features_shape, name="image_input") self.flat = tf.reduce_mean(self.image_features, axis=[1, 2], name="average_image") self.project_w = get_variable( name="img_init_proj_W", shape=[input_shape[2], output_shape], initializer=tf.glorot_normal_initializer()) self.project_b = get_variable( name="img_init_b", shape=[output_shape], initializer=tf.zeros_initializer())
def __init__( self, name: str, parent: TemporalStateful, factor: int, projection_size: int = None, projection_activation: Activation = None) -> None: """Initialize SentenceSplitter. Args: parent: TemporalStateful whose states will be split. factor: Factor by which the states will be split - the resulting sequence will be longer by this factor. projection_size: If not None, specifies dimensionality of a projection before state splitting. projection_activation: Non-linearity function for the optional projection. """ check_argument_types() ModelPart.__init__( self, name=name, save_checkpoint=None, load_checkpoint=None, initializers=None) self.parent = parent self.factor = factor self.projection_size = projection_size self.activation = projection_activation if projection_size is not None and projection_size % factor != 0: raise ValueError(( "Dimension of projection ({}) must be " "dividable by the given factor ({}).").format( projection_size, factor))
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor]
def __init__(self, name: str, input_sequence: TemporalStateful, rnn_layers: List[RNNSpecTuple], add_residual: bool = False, add_layer_norm: bool = False, include_final_layer_norm: bool = True, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of a recurrent encoder. Arguments: name: ModelPart name. input_seqeunce: The input sequence for the encoder. rnn_size: The dimension of the RNN hidden state vector. rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory cell to use. rnn_direction: One of "forward", "backward", "bidirectional". In what order to process the input sequence. Note that choosing "bidirectional" will double the resulting vector dimension as well as the number of encoder parameters. add_residual: Add residual connections to the RNN layer output. add_layer_norm: Add layer normalization after each RNN layer. include_final_layer_norm: Normalize also output of the network. dropout_keep_prob: 1 - dropout probability. save_checkpoint: ModelPart save checkpoint file. load_checkpoint: ModelPart load checkpoint file. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) TemporalStatefulWithOutput.__init__(self) self.input_sequence = input_sequence self.dropout_keep_prob = dropout_keep_prob self.rnn_specs = [_make_rnn_spec(*r) for r in rnn_layers] self.add_residual = add_residual self.add_layer_norm = add_layer_norm self.include_final_layer_norm = include_final_layer_norm if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") layer_sizes = [ 2 * layer.size if layer.direction == "bidirectional" else layer.size for layer in self.rnn_specs ] if add_residual and len(set(layer_sizes)) > 1: raise ValueError( "When using residual connectiong, all layers must have " "the same size, but are {}.".format(layer_sizes)) self._variable_scope.set_initializer( tf.random_normal_initializer(stddev=0.001))
def __init__(self, name: str, input_sequence: TemporalStateful, rnn_layers: List[RNNSpecTuple], add_residual: bool = False, add_layer_norm: bool = False, include_final_layer_norm: bool = True, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of a recurrent encoder. Arguments: name: ModelPart name. input_seqeunce: The input sequence for the encoder. rnn_size: The dimension of the RNN hidden state vector. rnn_cell: One of "GRU", "NematusGRU", "LSTM". Which kind of memory cell to use. rnn_direction: One of "forward", "backward", "bidirectional". In what order to process the input sequence. Note that choosing "bidirectional" will double the resulting vector dimension as well as the number of encoder parameters. add_residual: Add residual connections to the RNN layer output. add_layer_norm: Add layer normalization after each RNN layer. include_final_layer_norm: Normalize also output of the network. dropout_keep_prob: 1 - dropout probability. save_checkpoint: ModelPart save checkpoint file. load_checkpoint: ModelPart load checkpoint file. """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) TemporalStatefulWithOutput.__init__(self) self.input_sequence = input_sequence self.dropout_keep_prob = dropout_keep_prob self.rnn_specs = [_make_rnn_spec(*r) for r in rnn_layers] self.add_residual = add_residual self.add_layer_norm = add_layer_norm self.include_final_layer_norm = include_final_layer_norm if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") layer_sizes = [ 2 * layer.size if layer.direction == "bidirectional" else layer.size for layer in self.rnn_specs] if add_residual and len(set(layer_sizes)) > 1: raise ValueError( "When using residual connectiong, all layers must have " "the same size, but are {}.".format(layer_sizes)) self._variable_scope.set_initializer( tf.random_normal_initializer(stddev=0.001))
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor] self.train_mode = tf.placeholder(tf.bool, [], "train_mode")
def __init__(self, name: str, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new ``BaseAttention`` object.""" ModelPart.__init__(self, name, save_checkpoint, load_checkpoint, initializers) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor]
def __init__(self, name: str, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, layers: List[int], activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Construct a new instance of the sequence classifier. Args: name: Name of the decoder. Should be unique accross all Neural Monkey objects encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series layers: List defining structure of the NN. Ini example: layers=[100,20,5] ;creates classifier with hidden layers of size 100, 20, 5 and one output layer depending on the size of vocabulary activation_fn: activation function used on the output of each hidden layer. dropout_keep_prob: Probability of keeping a value during dropout """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.layers = layers self.activation_fn = activation_fn self.dropout_keep_prob = dropout_keep_prob self.max_output_len = 1 with self.use_scope(): self.gt_inputs = [tf.placeholder(tf.int32, [None], "targets")] mlp_input = tf.concat([enc.output for enc in self.encoders], 1) self._mlp = MultilayerPerceptron(mlp_input, self.layers, self.dropout_keep_prob, len(self.vocabulary), activation_fn=self.activation_fn, train_mode=self.train_mode) tf.summary.scalar("train_optimization_cost", self.cost, collections=["summary_train"])
def __init__(self, name: str, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new ``BaseAttention`` object.""" ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.query_state_size = None # type: tf.Tensor self._histories = {} # type: Dict[str, tf.Tensor]
def __init__(self, name: str, input_sequence: TemporalStateful, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize an instance of the pooling layer.""" check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.input_sequence = input_sequence
def __init__(self, name: str, data_id: str, convolutions: List[Union[ConvSpec, ResNetSpec, MaxPoolSpec]], image_height: int, image_width: int, pixel_dim: int, fully_connected: List[int] = None, batch_normalize: bool = False, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Initialize a convolutional network for image processing. The convolutional network can consist of plain convolutions, max-pooling layers and residual block. In the configuration, they are specified using the following tuples. * convolution: ("C", kernel_size, stride, padding, out_channel); * max / average pooling: ("M"/"A", kernel_size, stride, padding); * residual block: ("R", kernel_size, out_channels). Padding must be either "valid" or "same". Args: convolutions: Configuration of convolutional layers. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the image. pixel_dim: Number of color channels in the input images. dropout_keep_prob: Probability of keeping neurons active in dropout. Dropout is done between all convolutional layers and fully connected layer. """ check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.image_height = image_height self.image_width = image_width self.pixel_dim = pixel_dim self.convolutions = convolutions self.fully_connected = fully_connected self.batch_normalize = batch_normalize
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) # if it is from the pickled file, it is a list, not a numpy tensor, # so convert it as as a prevention images = np.array(list(dataset.get_series(self.data_id))) fd[self.image_input] = images / 255.0 return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) sentences_list = list(sentences) if sentences is not None else None if sentences_list is not None: fd[self.train_inputs] = list(zip(*sentences_list))[0] return fd
def __init__(self, name: str, data_id: str, input_size: int, max_input_len: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__( self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.data_id = data_id self.input_size = input_size self.max_input_len = max_input_len self.dropout_keep_prob = dropout_keep_prob
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: labels = [l[0] for l in pad_batch(list(sentences), self.max_output_len)] fd[self.targets] = labels return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is not None: fd[self.target_tokens] = pad_batch( list(sentences), self.max_output_len, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary with the encoder inputs. Arguments: dataset: The dataset to use train: Boolean flag telling whether it is training time """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.get_series(self.data_id) fd[self.input_tokens] = pad_batch(list(sentences), self.max_input_len) return fd
def __init__(self, name: str, max_length: int = None, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Construct a new `Sequence` object. Arguments: name: The name for the `ModelPart` object max_length: Maximum length of sequences in the object (not checked) save_checkpoint: The save_checkpoint parameter for `ModelPart` load_checkpoint: The load_checkpoint parameter for `ModelPart` """ ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.max_length = max_length if self.max_length is not None and self.max_length <= 0: raise ValueError("Max sequence length must be a positive integer.")
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("You must feed reference sentences when training") if sentences is not None: fd[self.target_tokens] = pad_batch(list(sentences), self.max_length) return fd
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, filters: List[Tuple[int, int]], max_input_len: int = None, dropout_keep_prob: float = 1.0, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Create a new instance of the CNN sequence encoder. Based on: Yoon Kim: Convolutional Neural Networks for Sentence Classification (http://emnlp2014.org/papers/pdf/EMNLP2014181.pdf) Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word filters: Specification of CNN filters. It is a list of tuples specifying the filter size and number of channels. dropout_keep_prob: The dropout keep probability (default 1.0) """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.dropout_keep_prob = dropout_keep_prob self.filters = filters
def __init__(self, name: str, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, layers: List[int], activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 0.5, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: """Construct a new instance of the sequence classifier. Args: name: Name of the decoder. Should be unique accross all Neural Monkey objects encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series layers: List defining structure of the NN. Ini example: layers=[100,20,5] ;creates classifier with hidden layers of size 100, 20, 5 and one output layer depending on the size of vocabulary activation_fn: activation function used on the output of each hidden layer. dropout_keep_prob: Probability of keeping a value during dropout """ check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.layers = layers self.activation_fn = activation_fn self.dropout_keep_prob = dropout_keep_prob self.max_output_len = 1
def __init__(self, name: str, encoders: List[Stateful], data_id: str, layers: List[int] = None, activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.relu, dropout_keep_prob: float = 1.0, dimension: int = 1, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() ModelPart.__init__(self, name, reuse, save_checkpoint, load_checkpoint, initializers) self.encoders = encoders self.data_id = data_id self.max_output_len = 1 self.dimension = dimension self._layers = layers self._activation_fn = activation_fn self._dropout_keep_prob = dropout_keep_prob
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Feed the placholders with the data. Arguments: dataset: The dataset. train: A flag whether the train mode is enabled. Returns: The constructed feed dictionary that contains the factor data and the mask. """ fd = ModelPart.feed_dict(self, dataset, train) # for checking the lengths of individual factors for factor_plc, name in zip(self.input_factors, self.data_ids): sentences = dataset.get_series(name) fd[factor_plc] = pad_batch( list(sentences), self.max_length, self.add_start_symbol, self.add_end_symbol) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: """Populate the feed dictionary for the decoder object. Arguments: dataset: The dataset to use for the decoder. train: Boolean flag, telling whether this is a training run. """ fd = ModelPart.feed_dict(self, dataset, train) sentences = dataset.maybe_get_series(self.data_id) if sentences is None and train: raise ValueError("When training, you must feed " "reference sentences") if sentences is not None: fd[self.train_tokens] = pad_batch( list(sentences), self.max_output_len, add_start_symbol=False, add_end_symbol=True) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) series = list(dataset.get_series(self.data_id)) lengths = [] inputs = [] max_len = max(x.shape[0] for x in series) if self.max_input_len is not None: max_len = min(self.max_input_len, max_len) for x in series: length = min(max_len, x.shape[0]) x_padded = np.zeros(shape=(max_len,) + x.shape[1:], dtype=x.dtype) x_padded[:length] = x[:length] lengths.append(length) inputs.append(x_padded) fd[self.temporal_states] = inputs fd[self._input_lengths] = lengths return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) fd[self.spatial_input] = list(dataset.get_series(self.data_id)) return fd
def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict: fd = ModelPart.feed_dict(self, dataset, train) fd[self.vector] = dataset.get_series(self.data_id) return fd
def feed_dict(self, dataset: Dataset, train: bool = True) -> FeedDict: return ModelPart.feed_dict(self, dataset, train)