def __init__(self, name: str, input_sequence: EmbeddedSequence, conv_features: int, encoder_layers: int, kernel_width: int = 5, dropout_keep_prob: float = 1.0, attention_type: type = None, attention_state_size: int = None, attention_fertility: int = 3, save_checkpoint: str = None, load_checkpoint: str = None) -> None: assert check_argument_types() ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size, attention_fertility=attention_fertility) self.input_sequence = input_sequence self.encoder_layers = encoder_layers self.conv_features = conv_features self.kernel_width = kernel_width self.dropout_keep_prob = dropout_keep_prob if conv_features <= 0: raise ValueError("Number of features must be a positive integer.") if encoder_layers <= 0: raise ValueError( "Number of encoder layers must be a positive integer.") log("Initializing convolutional seq2seq encoder, name {}".format( self.name))
def __init__(self, name: str, encoders: List[Any], attention_type: Type, attention_state_size: int, use_sentinels=False, share_attn_projections=False) -> None: """Initializes the encoder wrapper. Args: name: Name of the encoder / its scope. encoders: List of encoders to be wrapped. attention_type: Type of the attention combination. attention_state_size: Dimension of the state projection of attention energy computation. use_sentinels: Flag whether the sentinel mechanism should be added to the attention combination. share_attn_projections: Flag whether the hidden state projection should be shared for the both the energies computation and context vector computation. """ ModelPart.__init__(self, name, None, None) Attentive.__init__(self, attention_type) self.encoders = encoders self._attention_type = attention_type self._attention_state_size = attention_state_size self._use_sentinels = use_sentinels self._share_attn_projections = share_attn_projections self.encoded = tf.concat([e.encoded for e in encoders], 1)
def __init__(self, name: str, input_sequence: Sequence, rnn_size: int, dropout_keep_prob: float = 1.0, rnn_cell: str = "GRU", attention_type: type = None, attention_state_size: int = None, attention_fertility: int = 3, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create a new instance of a recurrent encoder.""" ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size, attention_fertility=attention_fertility) check_argument_types() self.input_sequence = input_sequence self.rnn_size = rnn_size self.dropout_keep_prob = dropout_keep_prob self.rnn_cell_str = rnn_cell if self.rnn_size <= 0: raise ValueError("RNN size must be a positive integer.") if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") if self.rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
def __init__(self, name: str, max_input_len: int, vocabularies: List[Vocabulary], data_ids: List[str], embedding_sizes: List[int], rnn_size: int, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None, **kwargs) -> None: """Construct a new instance of the factored encoder. Args: max_input_len: Maximum input length (longer sequences are trimmed) vocabularies: List of vocabularies indexed data_ids: List of data series IDs embedding_sizes: List of embedding sizes for each data series name: The name for this encoder. [sentence_encoder] rnn_size: The size of the hidden state Keyword arguments: use_noisy_activations: Boolean flag whether to use noisy activation functions in RNN cells. (see neuralmonkey.nn.noisy_gru_cell) [False] attention_type: The attention to use. [None] attention_fertility: Fertility for CoverageAttention (if used). [3] dropout_keep_prob: 1 - Dropout probability [1] """ attention_type = kwargs.get("attention_type", None) Attentive.__init__(self, attention_type) ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) assert check_argument_types() self.vocabularies = vocabularies self.data_ids = data_ids self.embedding_sizes = embedding_sizes self.max_input_len = max_input_len self.rnn_size = rnn_size self.dropout_keep_prob = kwargs.get("dropout_keep_prob", 1) self.use_noisy_activations = kwargs.get("use_noisy_activations", False) log("Building encoder graph, name: '{}'.".format(self.name)) with tf.variable_scope(self.name): self._create_encoder_graph() log("Encoder graph constructed.")
def __init__(self, name: str, max_input_len: int, vocabularies: List[Vocabulary], data_ids: List[str], embedding_sizes: List[int], rnn_size: int, dropout_keep_prob: float = 1.0, attention_type: Optional[Any] = None, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Construct a new instance of the factored encoder. Args: max_input_len: Maximum input length (longer sequences are trimmed) vocabularies: List of vocabularies indexed data_ids: List of data series IDs embedding_sizes: List of embedding sizes for each data series name: The name for this encoder. [sentence_encoder] rnn_size: The size of the hidden state Keyword arguments: attention_type: The attention to use. [None] attention_fertility: Fertility for CoverageAttention (if used). [3] dropout_keep_prob: 1 - Dropout probability [1] """ Attentive.__init__(self, attention_type) ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) assert check_argument_types() self.vocabularies = vocabularies self.data_ids = data_ids self.embedding_sizes = embedding_sizes self.max_input_len = max_input_len self.rnn_size = rnn_size self.dropout_keep_prob = dropout_keep_prob log("Building encoder graph, name: '{}'.".format(self.name)) with self.use_scope(): self._create_encoder_graph() log("Encoder graph constructed.")
def __init__(self, name: str, input_shape: List[int], output_shape: int, data_id: str, dropout_keep_prob: float = 1.0, attention_type=None, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: assert len(input_shape) == 3 ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(attention_type, {}) self.input_shape = input_shape self.output_shape = output_shape self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.attention_type = attention_type with tf.variable_scope(self.name): self.dropout_placeholder = tf.placeholder(tf.float32) features_shape = [None] + input_shape # type: ignore self.image_features = tf.placeholder(tf.float32, shape=features_shape, name="image_input") self.flat = tf.reduce_mean(self.image_features, reduction_indices=[1, 2], name="average_image") project_w = tf.get_variable( name="img_init_proj_W", shape=[input_shape[2], output_shape], initializer=tf.random_normal_initializer()) project_b = tf.get_variable(name="img_init_b", initializer=tf.zeros_initializer( [output_shape])) self.encoded = tf.tanh(tf.matmul(self.flat, project_w) + project_b) self.__attention_tensor = tf.reshape( self.image_features, [-1, input_shape[0] * input_shape[1], input_shape[2]], name="flatten_image")
def __init__(self, name: str, input_shape: List[int], output_shape: int, data_id: str, attention_type: Callable = None, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) check_argument_types() assert len(input_shape) == 3 if output_shape <= 0: raise ValueError("Output vector dimension must be postive.") self.data_id = data_id with self.use_scope(): features_shape = [None] + input_shape # type: ignore self.image_features = tf.placeholder(tf.float32, shape=features_shape, name="image_input") self.flat = tf.reduce_mean(self.image_features, axis=[1, 2], name="average_image") project_w = tf.get_variable( name="img_init_proj_W", shape=[input_shape[2], output_shape], initializer=tf.random_normal_initializer()) project_b = tf.get_variable( name="img_init_b", shape=[output_shape], initializer=tf.zeros_initializer()) self.encoded = tf.tanh(tf.matmul(self.flat, project_w) + project_b) self.__attention_tensor = tf.reshape( self.image_features, [-1, input_shape[0] * input_shape[1], input_shape[2]], name="flatten_image")
def __init__(self, name: str, data_id: str, convolutions: List[Tuple[int, int, Optional[int]]], image_height: int, image_width: int, pixel_dim: int, fully_connected: Optional[List[int]] = None, dropout_keep_prob: float = 0.5, attention_type: Type = Attention, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Initialize a convolutional network for image processing. Args: convolutions: Configuration of convolutional layers. It is a list of triplets of integers where the values are: size of the convolutional window, number of convolutional filters, and size of max-pooling window. If the max-pooling size is set to None, no pooling is performed. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the image. pixel_dim: Number of color channels in the input images. dropout_keep_prob: Probability of keeping neurons active in dropout. Dropout is done between all convolutional layers and fully connected layer. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob self.image_height = image_height self.image_width = image_width self.pixel_dim = pixel_dim self.convolutions = convolutions self.fully_connected = fully_connected
def __init__(self, name: str, data_id: str, network_type: str, output_layer: str, attention_type: Type = Attention, fine_tune: bool = False, load_checkpoint: Optional[str] = None, save_checkpoint: Optional[str] = None) -> None: ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) self.data_id = data_id self._network_type = network_type self.input_plc = tf.placeholder(tf.float32, [None, self.HEIGHT, self.WIDTH, 3]) if network_type not in SUPPORTED_NETWORKS: raise ValueError( "Network '{}' is not amonng the supoort ones ({})".format( network_type, ", ".join(SUPPORTED_NETWORKS.keys()))) scope, net_function = SUPPORTED_NETWORKS[network_type] with tf_slim.arg_scope(scope()): _, end_points = net_function(self.input_plc) with tf.variable_scope(self.name): net_output = end_points[output_layer] if not fine_tune: net_output = tf.stop_gradient(net_output) # pylint: disable=no-member shape = [s.value for s in net_output.get_shape()[1:]] # pylint: enable=no-member self.__attention_tensor = tf.reshape( net_output, [-1, shape[0] * shape[1], shape[2]]) self.encoded = tf.reduce_mean(net_output, [1, 2])
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, segment_size: int, highway_depth: int, rnn_size: int, filters: List[Tuple[int, int]], max_input_len: Optional[int] = None, dropout_keep_prob: float = 1.0, attention_type: Optional[Any] = None, attention_fertility: int = 3, use_noisy_activations: bool = False, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Create a new instance of the sentence encoder. Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word segment_size: The size of the segments over which we apply max-pooling. highway_depth: Depth of the highway layer. rnn_size: The size of the encoder's hidden state. Note that the actual encoder output state size will be twice as long because it is the result of concatenation of forward and backward hidden states. filters: Specification of CNN filters. It is a list of tuples specifying the filter size and number of channels. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) attention_fertility: Fertility parameter used with CoverageAttention (default 3). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_fertility=attention_fertility) check_argument_types() self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.segment_size = segment_size self.highway_depth = highway_depth self.rnn_size = rnn_size self.filters = filters self.dropout_keep_prob = dropout_keep_prob self.use_noisy_activations = use_noisy_activations if dropout_keep_prob <= 0. or dropout_keep_prob > 1.: raise ValueError(("Dropout keep probability must be " "in (0; 1], was {}").format(dropout_keep_prob)) if max_input_len is not None and max_input_len <= 0: raise ValueError("Input length must be a positive integer.") if embedding_size <= 0: raise ValueError("Embedding size must be a positive integer.") if rnn_size <= 0: raise ValueError("RNN size must be a positive integer.") if highway_depth <= 0: raise ValueError("Highway depth must be a positive integer.") if segment_size <= 0: raise ValueError("Segment size be a positive integer.") if not filters: raise ValueError("You must specify convolutional filters.") for filter_size, num_filters in self.filters: if filter_size <= 0: raise ValueError("Filter size must be a positive integer.") if num_filters <= 0: raise ValueError("Number of filters must be a positive int.")
def __init__(self, name: str, data_id: str, convolutions: List[Tuple[int, int, Optional[int]]], image_height: int, image_width: int, pixel_dim: int, fully_connected: Optional[List[int]] = None, batch_normalization: bool = True, local_response_normalization: bool = True, dropout_keep_prob: float = 0.5, attention_type: Type = Attention, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Initialize a convolutional network for image processing. Args: convolutions: Configuration of convolutional layers. It is a list of triplets of integers where the values are: size of the convolutional window, number of convolutional filters, and size of max-pooling window. If the max-pooling size is set to None, no pooling is performed. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the image. pixel_dim: Number of color channels in the input images. batch_normalization: Flag whether the batch normalization should be used between the convolutional layers. local_response_normalization: Flag whether to use local response normalization between the convolutional layers. dropout_keep_prob: Probability of keeping neurons active in dropout. Dropout is done between all convolutional layers and fully connected layer. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob with self.use_scope(): self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout") self.train_mode = tf.placeholder(tf.bool, shape=[], name="mode_placeholder") self.input_op = tf.placeholder(tf.float32, shape=(None, image_height, image_width, pixel_dim), name="input_images") self.padding_masks = tf.placeholder(tf.float32, shape=(None, image_height, image_width, 1), name="padding_masks") last_layer = self.input_op last_padding_masks = self.padding_masks self.image_processing_layers = [] # type: List[tf.Tensor] with tf.variable_scope("convolutions"): for i, (filter_size, n_filters, pool_size) in enumerate(convolutions): with tf.variable_scope("cnn_layer_{}".format(i)): last_layer = conv2d(last_layer, n_filters, filter_size) self.image_processing_layers.append(last_layer) if pool_size: last_layer = max_pool2d(last_layer, pool_size) self.image_processing_layers.append(last_layer) last_padding_masks = max_pool2d( last_padding_masks, pool_size) if local_response_normalization: last_layer = tf.nn.local_response_normalization( last_layer) if batch_normalization: last_layer = batch_norm( last_layer, is_training=self.train_mode) last_layer = dropout(last_layer, dropout_keep_prob, self.train_mode) # last_layer shape is batch X height X width X channels last_layer = last_layer * last_padding_masks # pylint: disable=no-member last_height, last_width, last_n_channels = [ s.value for s in last_layer.get_shape()[1:] ] # pylint: enable=no-member if fully_connected is None: # we average out by the image size -> shape is number # channels from the last convolution self.encoded = tf.reduce_mean(last_layer, [1, 2]) assert_shape(self.encoded, [None, convolutions[-1][1]]) else: last_layer_flat = tf.reshape( last_layer, [-1, last_width * last_height * last_n_channels]) self.encoded = multilayer_projection( last_layer_flat, fully_connected, activation=tf.nn.relu, dropout_plc=self.dropout_placeholder) self.__attention_tensor = tf.reshape( last_layer, [-1, last_width * last_height, last_n_channels]) self.__attention_mask = tf.reshape(last_padding_masks, [-1, last_width * last_height])
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, rnn_size: int, attention_state_size: int = None, max_input_len: int = None, dropout_keep_prob: float = 1.0, rnn_cell: str = "GRU", attention_type: type = None, attention_fertility: int = 3, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create a new instance of the sentence encoder. Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word rnn_size: The size of the encoder's hidden state. Note that the actual encoder output state size will be twice as long because it is the result of concatenation of forward and backward hidden states. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) attention_state_size: The size of the attention inner state. If None, use the size of the encoder hidden state. (defalult None) attention_fertility: Fertility parameter used with CoverageAttention (default 3). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size, attention_fertility=attention_fertility) check_argument_types() self.vocabulary = vocabulary self.vocabulary_size = len(self.vocabulary) self.data_id = data_id self.embedding_size = embedding_size self.rnn_size = rnn_size self.max_input_len = max_input_len self.dropout_keep_prob = dropout_keep_prob self.rnn_cell_str = rnn_cell if self.max_input_len is not None and self.max_input_len <= 0: raise ValueError("Input length must be a positive integer.") if self.embedding_size <= 0: raise ValueError("Embedding size must be a positive integer.") if self.rnn_size <= 0: raise ValueError("RNN size must be a positive integer.") if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") if self.rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
def __init__(self, name: str, data_id: str, rnn_size: int, input_dimension: int, max_input_len: Optional[int] = None, dropout_keep_prob: float = 1.0, attention_type: Optional[Any] = None, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Creates a new instance of the encoder. Arguments: data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder rnn_size: The size of the encoder's hidden state. Note that the actual encoder output state size will be twice as long because it is the result of concatenation of forward and backward hidden states. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) assert check_argument_types() self.data_id = data_id self.rnn_size = rnn_size self.max_input_len = max_input_len self.input_dimension = input_dimension self.dropout_keep_p = dropout_keep_prob log("Initializing RNN encoder, name: '{}'".format(self.name)) with tf.variable_scope(self.name): self._create_input_placeholders() self._input_mask = tf.sequence_mask(self._input_lengths, dtype=tf.float32) fw_cell, bw_cell = self.rnn_cells() # type: RNNCellTuple outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, self.inputs, self._input_lengths, dtype=tf.float32) self.hidden_states = tf.concat(outputs_bidi_tup, 2) with tf.variable_scope('attention_tensor'): self.__attention_tensor = dropout(self.hidden_states, self.dropout_keep_p, self.train_mode) self.encoded = tf.concat(encoded_tup, 1) log("RNN encoder initialized")
def __init__(self, name: str, vocabularies: List[Vocabulary], data_ids: List[str], embedding_sizes: List[int], rnn_size: int, attention_state_size: int = None, max_input_len: int = None, dropout_keep_prob: float = 1.0, rnn_cell: str = "GRU", attention_type: type = None, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Construct a new instance of the factored encoder. Args: vocabularies: List of vocabularies indexed data_ids: List of data series IDs embedding_sizes: List of embedding sizes for each data series name: The name for this encoder. rnn_size: The size of the hidden state Keyword arguments: attention_state_size: The size of the attention hidden state max_input_len: Maximum input length (longer sequences are trimmed) attention_type: The attention to use. dropout_keep_prob: Dropout keep probability """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size) check_argument_types() self.vocabularies = vocabularies self.vocabulary_sizes = [len(voc) for voc in self.vocabularies] self.data_ids = data_ids self.embedding_sizes = embedding_sizes self.rnn_size = rnn_size self.max_input_len = max_input_len self.dropout_keep_prob = dropout_keep_prob self.rnn_cell_str = rnn_cell if not (len(self.data_ids) == len(self.vocabularies) == len(self.embedding_sizes)): raise ValueError("data_ids, vocabularies, and embedding_sizes " "lists need to have the same length") if max_input_len is not None and max_input_len <= 0: raise ValueError("Input length must be a positive integer.") if any([esize <= 0 for esize in embedding_sizes]): raise ValueError("Embedding size must be a positive integer.") if rnn_size <= 0: raise ValueError("RNN size must be a positive integer.") if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0: raise ValueError("Dropout keep prob must be inside (0,1].") if self.rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU' or 'LSTM'")
def __init__(self, name: str, data_id: str, network_type: str, attention_layer: Optional[str] = None, attention_state_size: Optional[int] = None, attention_type: Type = Attention, fine_tune: bool = False, encoded_layer: Optional[str] = None, load_checkpoint: Optional[str] = None, save_checkpoint: Optional[str] = None) -> None: """Initialize pre-trained ImageNet network. Args: name: Name of the model part (the ImageNet network, will be in its scope, independently on `name`). data_id: Id of series with images (list of 3D numpy arrays) network_type: Identifier of ImageNet network from TFSlim. attention_layer: String identifier of the convolutional map (model's endpoint) that will be used for attention. Check TFSlim documentation for end point specifications. attention_state_size: Dimensionality of state projection in attention computation. attention_type: Type of attention. fine_tune: Flag whether the network should be further trained with the rest of the model. encoded_layer: String id of the network layer that will be used as input of a decoder. `None` means averaging the convolutional maps. load_checkpoint: Checkpoint file from which the pre-trained network is loaded. save_checkpoint: Checkpoint file where the encoder is saved after the training. (Makes sense only if `fine_tune` is set to `True`). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size) if attention_layer is None and attention_type is not None: raise ValueError("Attention type is set, although " "attention layer is not specified.") if save_checkpoint is not None and not fine_tune: warn("The ImageNet network is not fine-tuned and still it is set " "to save after the training is finished.") self.data_id = data_id self.network_type = network_type self.attention_layer = attention_layer self.encoded_layer = encoded_layer self.fine_tune = fine_tune if self.network_type not in SUPPORTED_NETWORKS: raise ValueError( "Network '{}' is not among the supoort ones ({})".format( self.network_type, ", ".join(SUPPORTED_NETWORKS.keys()))) scope, net_function = SUPPORTED_NETWORKS[self.network_type] with tf_slim.arg_scope(scope()): _, self.end_points = net_function(self.input_image) if (self.attention_layer is not None and self.attention_layer not in self.end_points): raise ValueError( "Network '{}' does not contain endpoint '{}'".format( self.network_type, self.attention_layer)) if attention_layer is not None: net_output = self.end_points[self.attention_layer] if len(net_output.get_shape()) != 4: raise ValueError( ("Endpoint '{}' for network '{}' cannot be " "a convolutional map, its dimensionality is: {}." ).format(self.attention_layer, self.network_type, ", ".join([str(d.value) for d in net_output.get_shape()]))) if (self.encoded_layer is not None and self.encoded_layer not in self.end_points): raise ValueError( "Network '{}' does not contain endpoint '{}'.".format( self.network_type, self.encoded_layer))
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, rnn_size: int, max_input_len: Optional[int]=None, dropout_keep_prob: float=1.0, attention_type: Optional[Any]=None, attention_fertility: int=3, use_noisy_activations: bool=False, parent_encoder: Optional["SentenceEncoder"]=None, save_checkpoint: Optional[str]=None, load_checkpoint: Optional[str]=None) -> None: """Create a new instance of the sentence encoder. Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word rnn_size: The size of the encoder's hidden state. Note that the actual encoder output state size will be twice as long because it is the result of concatenation of forward and backward hidden states. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) attention_fertility: Fertility parameter used with CoverageAttention (default 3). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__( self, attention_type, attention_fertility=attention_fertility) assert check_argument_types() self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.rnn_size = rnn_size self.dropout_keep_p = dropout_keep_prob self.use_noisy_activations = use_noisy_activations self.parent_encoder = parent_encoder if max_input_len is not None and max_input_len <= 0: raise ValueError("Input length must be a positive integer.") log("Initializing sentence encoder, name: '{}'" .format(self.name)) with self.use_scope(): self._create_input_placeholders() with tf.variable_scope('input_projection'): self._create_embedding_matrix() embedded_inputs = self._embed(self.inputs) # type: tf.Tensor self.embedded_inputs = embedded_inputs fw_cell, bw_cell = self.rnn_cells() # type: RNNCellTuple outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, embedded_inputs, sequence_length=self.sentence_lengths, dtype=tf.float32) self.hidden_states = tf.concat(outputs_bidi_tup, 2) with tf.variable_scope('attention_tensor'): self.__attention_tensor = self._dropout( self.hidden_states) self.encoded = tf.concat(encoded_tup, 1) log("Sentence encoder initialized")
def __init__(self, name: str, data_id: str, network_type: str, attention_layer: Optional[str], attention_state_size: int, attention_type: Type=Attention, fine_tune: bool=False, encoded_layer: Optional[str]=None, load_checkpoint: Optional[str]=None, save_checkpoint: Optional[str]=None) -> None: """Initialize pre-trained ImageNet network. Args: name: Name of the model part (the ImageNet network, will be in its scope, independently on `name`). data_id: Id of series with images (list of 3D numpy arrays) network_type: Identifier of ImageNet network from TFSlim. attention_layer: String identifier of the convolutional map (model's endpoint) that will be used for attention. Check TFSlim documentation for end point specifications. attention_state_size: Dimensionality of state projection in attention computation. attention_type: Type of attention. fine_tune: Flag whether the network should be further trained with the rest of the model. encoded_layer: String id of the network layer that will be used as input of a decoder. `None` means averaging the convolutional maps. load_checkpoint: Checkpoint file from which the pre-trained network is loaded. save_checkpoint: Checkpoint file where the encoder is saved after the training. (Makes sense only if `fine_tune` is set to `True`). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_state_size=attention_state_size) if attention_layer is None and attention_type is not None: raise ValueError("Attention type is set, although " "attention layer is not specified.") if save_checkpoint is not None and not fine_tune: warn("The ImageNet network is not fine-tuned and still it is set " "to save after the training is finished.") self.data_id = data_id self._network_type = network_type self.input_plc = tf.placeholder( tf.float32, [None, self.HEIGHT, self.WIDTH, 3]) if network_type not in SUPPORTED_NETWORKS: raise ValueError( "Network '{}' is not among the supoort ones ({})".format( network_type, ", ".join(SUPPORTED_NETWORKS.keys()))) scope, net_function = SUPPORTED_NETWORKS[network_type] with tf_slim.arg_scope(scope()): _, end_points = net_function(self.input_plc) with tf.variable_scope(self.name): if attention_layer is not None: if attention_layer not in end_points: raise ValueError( "Network '{}' does not contain endpoint '{}'".format( network_type, attention_layer)) net_output = end_points[attention_layer] if len(net_output.get_shape()) != 4: raise ValueError(( "Endpoint '{}' for network '{}' cannot be " "a convolutional map, its dimensionality is: {}." ).format(attention_layer, network_type, ", ".join([str(d.value) for d in net_output.get_shape()]))) if not fine_tune: net_output = tf.stop_gradient(net_output) # pylint: disable=no-member shape = [s.value for s in net_output.get_shape()[1:]] # pylint: enable=no-member self.__attention_tensor = tf.reshape( net_output, [-1, shape[0] * shape[1], shape[2]]) if encoded_layer is not None: if encoded_layer not in end_points: raise ValueError( "Network '{}' does not contain endpoint '{}'.".format( network_type, encoded_layer)) self.encoded = tf.squeeze(end_points[encoded_layer], [1, 2]) if not fine_tune: self.encoded = tf.stop_gradient(self.encoded) else: self.encoded = tf.reduce_mean(net_output, [1, 2])
def __init__(self, name: str, data_id: str, input_size: int, rnn_layers: List[RNNSpecTuple], max_input_len: Optional[int] = None, dropout_keep_prob: float = 1.0, attention_type: Optional[Any] = None, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Creates a new instance of the encoder. Arguments: data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder rnn_layers: A list of tuples specifying the size and, optionally, the direction ('forward', 'backward' or 'both') and cell type ('GRU' or 'LSTM') of each RNN layer. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) check_argument_types() self.data_id = data_id self._rnn_layers = [_make_rnn_spec(*r) for r in rnn_layers] self.max_input_len = max_input_len self.input_size = input_size self.dropout_keep_prob = dropout_keep_prob log("Initializing RNN encoder, name: '{}'".format(self.name)) with self.use_scope(): self._create_input_placeholders() self.states_mask = tf.sequence_mask(self._input_lengths, dtype=tf.float32) states = self.inputs states_reversed = False def reverse_states(): nonlocal states, states_reversed states = tf.reverse_sequence(states, self._input_lengths, batch_dim=0, seq_dim=1) states_reversed = not states_reversed for i, layer in enumerate(self._rnn_layers): with tf.variable_scope('rnn_{}_{}'.format(i, layer.direction)): cell = _make_rnn_cell(layer) if layer.direction == 'both': outputs_tup, encoded_tup = ( tf.nn.bidirectional_dynamic_rnn( cell(), cell(), states, self._input_lengths, dtype=tf.float32)) if states_reversed: # treat forward as backward and vice versa outputs_tup = tuple(reversed(outputs_tup)) encoded_tup = tuple(reversed(encoded_tup)) states_reversed = False states = tf.concat(outputs_tup, 2) encoded = tf.concat(encoded_tup, 1) elif layer.direction in ['forward', 'backward']: should_be_reversed = (layer.direction == 'backward') if states_reversed != should_be_reversed: reverse_states() states, encoded = tf.nn.dynamic_rnn( cell(), states, sequence_length=self._input_lengths, dtype=tf.float32) else: raise ValueError("Unknown RNN direction {}".format( layer.direction)) if i < len(self._rnn_layers) - 1: states = dropout(states, self.dropout_keep_prob, self.train_mode) if states_reversed: reverse_states() self.hidden_states = states self.encoded = encoded with tf.variable_scope('attention_tensor'): self.__attention_tensor = dropout(self.hidden_states, self.dropout_keep_prob, self.train_mode) log("RNN encoder initialized")
def __init__(self, name: str, data_id: str, convolutions: List[Tuple[int, int, Optional[int]]], image_height: int, image_width: int, pixel_dim: int, batch_normalization: bool = True, local_response_normalization: bool = True, dropout_keep_prob: float = 0.5, attention_type: Type = Attention, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Initialize a convolutional network for image processing. Args: convolutions (list): Configuration convolutional layers. It is a list of tripplets of integers where the values are: size of the convolutional window, number of convolutional filters, and size of max-pooling window. If the max-pooling size is set to None, no pooling is performed. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the images (padded) pixel_dim: Number of color channels in the input images. batch_normalization: Flag whether the batch normalization should be used between the convolutional layers. local_response_normalization: Flag whether to use local response normalization between the convolutional layers. dropout_placeholder: Placeholder keeping the dropout keeping probability """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) self.convolutions = convolutions self.data_id = data_id self.image_height = image_height self.image_width = image_width self.pixel_dim = pixel_dim self.dropout_keep_prob = dropout_keep_prob with tf.variable_scope(name): self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout") self.is_training = tf.placeholder(tf.bool, name="is_training") self.input_op = tf.placeholder(tf.float32, shape=(None, image_height, image_width, pixel_dim), name="input_images") self.padding_masks = tf.placeholder(tf.float32, shape=(None, image_height, image_width, 1), name="padding_masks") last_layer = self.input_op last_padding_masks = self.padding_masks last_n_channels = pixel_dim self.is_training = tf.placeholder(tf.bool, name="is_training") self.image_processing_layers = [] # type: List[tf.Tensor] with tf.variable_scope("convolutions"): for i, (filter_size, n_filters, pool_size) in enumerate(convolutions): with tf.variable_scope("cnn_layer_{}".format(i)): last_layer = _convolution(last_layer, last_n_channels, filter_size, n_filters) last_n_channels = n_filters self.image_processing_layers.append(last_layer) if pool_size: # TODO do the pooling properly last_layer = tf.nn.max_pool( last_layer, [1, 2, 2, 1], [1, 2, 2, 1], "SAME") last_padding_masks = tf.nn.max_pool( last_padding_masks, [1, 2, 2, 1], [1, 2, 2, 1], "SAME") self.image_processing_layers.append(last_layer) assert image_height % 2 == 0 image_height //= 2 assert image_width % 2 == 0 image_width //= 2 if local_response_normalization: last_layer = tf.nn.local_response_normalization( last_layer) if batch_normalization: last_layer = _batch_norm(last_layer, n_filters, self.is_training) last_layer = tf.nn.dropout( last_layer, keep_prob=self.dropout_placeholder) # last_layer shape is batch X height X width X channels last_layer = last_layer * last_padding_masks # we average out by the image size -> shape is number # channels from the last convolution self.encoded = tf.reduce_mean(last_layer, [1, 2]) # TODO assert shape assert_shape(self.encoded, [None, self.convolutions[-1][1]]) self.__attention_tensor = tf.reshape( last_layer, [-1, image_width, last_n_channels * image_height]) self.__attention_mask = tf.squeeze( tf.reduce_prod(last_padding_masks, [1]), [2])
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, segment_size: int, highway_depth: int, rnn_size: int, filters: List[Tuple[int, int]], max_input_len: Optional[int] = None, dropout_keep_prob: float = 1.0, attention_type: Optional[Any] = None, attention_fertility: int = 3, use_noisy_activations: bool = False, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Create a new instance of the sentence encoder. Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word segment_size: The size of the segments over which we apply max-pooling. highway_depth: Depth of the highway layer. rnn_size: The size of the encoder's hidden state. Note that the actual encoder output state size will be twice as long because it is the result of concatenation of forward and backward hidden states. filters: Specification of CNN filters. It is a list of tuples specifying the filter size and number of channels. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) attention_fertility: Fertility parameter used with CoverageAttention (default 3). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_fertility=attention_fertility) assert check_argument_types() self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.segment_size = segment_size self.highway_depth = highway_depth self.rnn_size = rnn_size self.filters = filters self.dropout_keep_p = dropout_keep_prob self.use_noisy_activations = use_noisy_activations if max_input_len is not None and max_input_len <= 0: raise ValueError("Input length must be a positive integer.") log("Initializing sentence encoder, name: '{}'".format(self.name)) with self.use_scope(): self._create_input_placeholders() with tf.variable_scope('input_projection'): self._create_embedding_matrix() embedded_inputs = self._embed(self.inputs) # type: tf.Tensor self.embedded_inputs = embedded_inputs # CNN Network pooled_outputs = [] for filter_size, num_filters in self.filters: with tf.variable_scope("conv-maxpool-%s" % filter_size): filter_shape = [filter_size, embedding_size, num_filters] w_filter = tf.get_variable( "conv_W", filter_shape, initializer=tf.random_uniform_initializer(-0.5, 0.5)) b_filter = tf.get_variable( "conv_bias", [num_filters], initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv1d(embedded_inputs, w_filter, stride=1, padding="SAME", name="conv") # Apply nonlinearity conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter)) # Max-pooling over the output segments expanded_conv_relu = tf.expand_dims(conv_relu, -1) pooled = tf.nn.max_pool( expanded_conv_relu, ksize=[1, self.segment_size, 1, 1], strides=[1, self.segment_size, 1, 1], padding="SAME", name="maxpool") pooled_outputs.append(pooled) # Combine all the pooled features self.cnn_encoded = tf.concat(pooled_outputs, axis=2) self.cnn_encoded = tf.squeeze(self.cnn_encoded, [3]) # Highway Network batch_size = tf.shape(self.cnn_encoded)[0] # pylint: disable=no-member cnn_out_size = self.cnn_encoded.get_shape().as_list()[-1] highway_layer = tf.reshape(self.cnn_encoded, [-1, cnn_out_size]) for i in range(self.highway_depth): highway_layer = highway(highway_layer, scope=("highway_layer_%s" % i)) highway_layer = tf.reshape(highway_layer, [batch_size, -1, cnn_out_size]) # BiRNN Network fw_cell, bw_cell = self.rnn_cells() # type: RNNCellTuple seq_lens = tf.ceil( tf.divide(self.sentence_lengths, self.segment_size)) seq_lens = tf.cast(seq_lens, tf.int32) outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, highway_layer, sequence_length=seq_lens, dtype=tf.float32) self.hidden_states = tf.concat(outputs_bidi_tup, 2) with tf.variable_scope('attention_tensor'): self.__attention_tensor = self._dropout(self.hidden_states) self.encoded = tf.concat(encoded_tup, 1) log("Sentence encoder initialized")