def get_encoder_projections(self, scope: str) -> List[tf.Tensor]: encoder_projections = [] with tf.variable_scope(scope): for i, encoder_tensor in enumerate(self._encoders_tensors): encoder_state_size = encoder_tensor.get_shape()[2].value encoder_tensor_shape = tf.shape(encoder_tensor) proj_matrix = get_variable( "proj_matrix_{}".format(i), [encoder_state_size, self.attention_state_size], initializer=tf.random_normal_initializer(stddev=0.001)) proj_bias = get_variable( "proj_bias_{}".format(i), shape=[self.attention_state_size], initializer=tf.zeros_initializer()) encoder_tensor_2d = tf.reshape( encoder_tensor, [-1, encoder_state_size]) projected_2d = tf.matmul( encoder_tensor_2d, proj_matrix) + proj_bias assert_shape(projected_2d, [-1, self.attention_state_size]) projection = tf.reshape( projected_2d, [encoder_tensor_shape[0], encoder_tensor_shape[1], self.attention_state_size]) encoder_projections.append(projection) return encoder_projections
def encoded(self) -> tf.Tensor: """Output vector of the CNN. If there are specified some fully connected layers, there are applied on top of the last convolutional map. Dropout is applied between all layers, default activation function is ReLU. There are only projection layers, no softmax is applied. If there is fully_connected layer specified, average-pooled last convolutional map is used as a vector output. """ # pylint: disable=no-member last_height, last_width, last_n_channels = [ s.value for s in self.states.get_shape()[1:] ] # pylint: enable=no-member if self.fully_connected is None: # we average out by the image size -> shape is number # channels from the last convolution encoded = tf.reduce_mean(self.states, [1, 2]) assert_shape(encoded, [None, self.convolutions[-1][1]]) return encoded states_flat = tf.reshape( self.states, [-1, last_width * last_height * last_n_channels]) return multilayer_projection(states_flat, self.fully_connected, activation=tf.nn.relu, dropout_keep_prob=self.dropout_keep_prob, train_mode=self.train_mode)
def get_encoder_projections(self, scope): encoder_projections = [] with tf.variable_scope(scope): for i, encoder_tensor in enumerate(self._encoders_tensors): encoder_state_size = encoder_tensor.get_shape()[2].value encoder_tensor_shape = tf.shape(encoder_tensor) proj_matrix = get_variable( "proj_matrix_{}".format(i), [encoder_state_size, self.attention_state_size], initializer=tf.random_normal_initializer(stddev=0.001)) proj_bias = get_variable("proj_bias_{}".format(i), shape=[self.attention_state_size], initializer=tf.zeros_initializer()) encoder_tensor_2d = tf.reshape(encoder_tensor, [-1, encoder_state_size]) projected_2d = tf.matmul(encoder_tensor_2d, proj_matrix) + proj_bias assert_shape(projected_2d, [-1, self.attention_state_size]) projection = tf.reshape(projected_2d, [ encoder_tensor_shape[0], encoder_tensor_shape[1], self.attention_state_size ]) encoder_projections.append(projection) return encoder_projections
def __call__(self, inputs, state, scope=None): output, new_state = self._cell(inputs, state) # self._mask is of shape [batch_size, state_size] # new_state is of shape [batch_size, state_size] (hopefully) new_state_dropped = new_state * self._scale * self._mask assert_shape(new_state_dropped, [None, self._cell.sate_size]) return output, new_state_dropped
def _encoders_masks(self) -> List[tf.Tensor]: masks = [get_attention_mask(e) for e in self._encoders] for e_m in masks: assert_shape(e_m, [-1, -1]) if self._use_sentinels: masks.append(tf.ones([tf.shape(masks[0])[0], 1])) return masks
def __init__(self, name: str, encoders: List[Attendable], attention_state_size: int, share_attn_projections: bool = False, use_sentinels: bool = False, reuse: ModelPart = None, save_checkpoint: str = None, load_checkpoint: str = None, initializers: InitializerSpecs = None) -> None: check_argument_types() MultiAttention.__init__(self, name=name, attention_state_size=attention_state_size, share_attn_projections=share_attn_projections, use_sentinels=use_sentinels, reuse=reuse, save_checkpoint=save_checkpoint, load_checkpoint=load_checkpoint, initializers=initializers) self._encoders = encoders # pylint: disable=protected-access self._encoders_tensors = [ get_attention_states(e) for e in self._encoders ] self._encoders_masks = [get_attention_mask(e) for e in self._encoders] # pylint: enable=protected-access for e_m in self._encoders_masks: assert_shape(e_m, [-1, -1]) for e_t in self._encoders_tensors: assert_shape(e_t, [-1, -1, -1]) with self.use_scope(): self.encoder_projections_for_logits = \ self.get_encoder_projections("logits_projections") self.encoder_attn_biases = [ get_variable(name="attn_bias_{}".format(i), shape=[], initializer=tf.zeros_initializer()) for i in range(len(self._encoders_tensors)) ] if self._share_projections: self.encoder_projections_for_ctx = \ self.encoder_projections_for_logits else: self.encoder_projections_for_ctx = \ self.get_encoder_projections("context_projections") if self._use_sentinels: self._encoders_masks.append( tf.ones([tf.shape(self._encoders_masks[0])[0], 1])) self.masks_concat = tf.concat(self._encoders_masks, 1)
def attention( self, query: tf.Tensor, decoder_prev_state: tf.Tensor, decoder_input: tf.Tensor, loop_state: AttentionLoopState ) -> Tuple[tf.Tensor, AttentionLoopState]: with tf.variable_scope(self.att_scope_name): projected_state = tf.layers.dense(query, self.attention_state_size) projected_state = tf.expand_dims(projected_state, 1) assert_shape(projected_state, [-1, 1, self.attention_state_size]) logits = [] for proj, bias in zip(self.encoder_projections_for_logits, self.encoder_attn_biases): logits.append( tf.reduce_sum( self.attn_v * tf.tanh(projected_state + proj), [2]) + bias) if self._use_sentinels: sentinel_value = _sentinel(query, decoder_prev_state, decoder_input) projected_sentinel, sentinel_logit = self._vector_logit( projected_state, sentinel_value, scope="sentinel") logits.append(sentinel_logit) attentions = self._renorm_softmax(tf.concat(logits, 1)) self.attentions_in_time.append(attentions) if self._use_sentinels: tiled_encoder_projections = self._tile_encoders_for_beamsearch( projected_sentinel) projections_concat = tf.concat( tiled_encoder_projections + [projected_sentinel], 1) else: projections_concat = tf.concat( self.encoder_projections_for_ctx, 1) contexts = tf.reduce_sum( tf.expand_dims(attentions, 2) * projections_concat, [1]) next_contexts = tf.concat( [loop_state.contexts, tf.expand_dims(contexts, 0)], 0) next_weights = tf.concat( [loop_state.weights, tf.expand_dims(attentions, 0)], 0) next_loop_state = AttentionLoopState(contexts=next_contexts, weights=next_weights) return contexts, next_loop_state
def attention(self, query: tf.Tensor, decoder_prev_state: tf.Tensor, decoder_input: tf.Tensor, loop_state: AttentionLoopState) -> Tuple[ tf.Tensor, AttentionLoopState]: with tf.variable_scope(self.att_scope_name): projected_state = tf.layers.dense(query, self.attention_state_size) projected_state = tf.expand_dims(projected_state, 1) assert_shape(projected_state, [-1, 1, self.attention_state_size]) logits = [] for proj, bias in zip(self.encoder_projections_for_logits, self.encoder_attn_biases): logits.append(tf.reduce_sum( self.attn_v * tf.tanh(projected_state + proj), [2]) + bias) if self._use_sentinels: sentinel_value = _sentinel(query, decoder_prev_state, decoder_input) projected_sentinel, sentinel_logit = self._vector_logit( projected_state, sentinel_value, scope="sentinel") logits.append(sentinel_logit) attentions = self._renorm_softmax(tf.concat(logits, 1)) self.attentions_in_time.append(attentions) if self._use_sentinels: tiled_encoder_projections = self._tile_encoders_for_beamsearch( projected_sentinel) projections_concat = tf.concat( tiled_encoder_projections + [projected_sentinel], 1) else: projections_concat = tf.concat( self.encoder_projections_for_ctx, 1) contexts = tf.reduce_sum( tf.expand_dims(attentions, 2) * projections_concat, [1]) next_contexts = tf.concat( [loop_state.contexts, tf.expand_dims(contexts, 0)], 0) next_weights = tf.concat( [loop_state.weights, tf.expand_dims(attentions, 0)], 0) next_loop_state = AttentionLoopState( contexts=next_contexts, weights=next_weights) return contexts, next_loop_state
def _sentinel(state, prev_state, input_): """Sentinel value given the decoder state.""" with tf.variable_scope("sentinel"): decoder_state_size = state.get_shape()[-1].value st_with_inp = tf.concat([prev_state, input_], 1) gate = tf.nn.sigmoid(tf.layers.dense(st_with_inp, decoder_state_size)) sentinel_value = gate * state assert_shape(sentinel_value, [-1, decoder_state_size]) return sentinel_value
def outputs_bidi(self): """Outputs of the bidirectional layer""" # outputs and outputs_rev, both lists in time of shape batch x rnn_size outputs_bidi = [ tf.concat(1, [o1, o2]) for o1, o2 in zip(self._outputs, self._outputs_rev) ] # concatenations have shape batch x (2 * rnn_size) for out in outputs_bidi: assert_shape(out, [None, self._output_size]) return outputs_bidi
def _convolution(last_layer: tf.Tensor, last_n_channels: int, filter_size: int, n_filters: int) -> tf.Tensor: """Applies convolution on a filter bank.""" conv_w = tf.get_variable( "wieghts", shape=[filter_size, filter_size, last_n_channels, n_filters], initializer=tf.truncated_normal_initializer(stddev=.1)) conv_b = tf.get_variable("biases", shape=[n_filters], initializer=tf.constant_initializer(.1)) conv_activation = tf.nn.conv2d(last_layer, conv_w, [1, 1, 1, 1], "SAME") + conv_b assert_shape(conv_activation, [ None, last_layer.get_shape()[1].value, last_layer.get_shape()[2].value, filter_size ]) return tf.nn.relu(conv_activation)
def attention(self, decoder_state, decoder_prev_state, decoder_input): with tf.variable_scope(self.scope): projected_state = linear(decoder_state, self.attention_state_size) projected_state = tf.expand_dims(projected_state, 1) assert_shape(projected_state, [-1, 1, self.attention_state_size]) attn_ctx_vectors = [ a.attention(decoder_state, decoder_prev_state, decoder_input) for a in self._attn_objs] proj_ctxs, attn_logits = [list(t) for t in zip(*[ self._vector_logit(projected_state, ctx_vec, scope=enc.name) for ctx_vec, enc in zip(attn_ctx_vectors, self._encoders)])] if self._use_sentinels: sentinel_value = _sentinel(decoder_state, decoder_prev_state, decoder_input) proj_sentinel, sentinel_logit = self._vector_logit( projected_state, sentinel_value, scope="sentinel") proj_ctxs.append(proj_sentinel) attn_logits.append(sentinel_logit) attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1)) self.attentions_in_time.append(attention_distr) if self._share_projections: output_cxts = proj_ctxs else: output_cxts = [ tf.expand_dims( linear(ctx_vec, self.attention_state_size, scope="proj_attn_{}".format(enc.name)), 1) for ctx_vec, enc in zip(attn_ctx_vectors, self._encoders)] if self._use_sentinels: output_cxts.append(tf.expand_dims( linear(sentinel_value, self.attention_state_size, scope="proj_sentinel"), 1)) projections_concat = tf.concat(output_cxts, 1) context = tf.reduce_sum( tf.expand_dims(attention_distr, 2) * projections_concat, [1]) return context
def attention(self, decoder_state, decoder_prev_state, decoder_input): with tf.variable_scope(self.scope): projected_state = linear(decoder_state, self.attention_state_size) projected_state = tf.expand_dims(projected_state, 1) assert_shape(projected_state, [-1, 1, self.attention_state_size]) logits = [] for proj, bias in zip(self.encoder_projections_for_logits, self.encoder_attn_biases): logits.append(tf.reduce_sum( self.attn_v * tf.tanh(projected_state + proj), [2]) + bias) if self._use_sentinels: sentinel_value = _sentinel(decoder_state, decoder_prev_state, decoder_input) projected_sentinel, sentinel_logit = self._vector_logit( projected_state, sentinel_value, scope="sentinel") logits.append(sentinel_logit) attentions = self._renorm_softmax(tf.concat(logits, 1)) self.attentions_in_time.append(attentions) if self._use_sentinels: tiled_encoder_projections = self._tile_encoders_for_beamsearch( projected_sentinel) projections_concat = tf.concat( tiled_encoder_projections + [projected_sentinel], 1) else: projections_concat = tf.concat( self.encoder_projections_for_ctx, 1) contexts = tf.reduce_sum( tf.expand_dims(attentions, 2) * projections_concat, [1]) return contexts
def _vector_logit(self, projected_decoder_state: tf.Tensor, vector_value: tf.Tensor, scope: str) -> tf.Tensor: """Get logit for a single vector, e.g., sentinel vector.""" assert_shape(projected_decoder_state, [-1, 1, -1]) assert_shape(vector_value, [-1, -1]) with tf.variable_scope("{}_logit".format(scope)): vector_bias = get_variable("vector_bias", [], initializer=tf.zeros_initializer()) proj_vector_for_logit = tf.expand_dims( tf.layers.dense(vector_value, self.attention_state_size, name="vector_projection"), 1) if self._share_projections: proj_vector_for_ctx = proj_vector_for_logit else: proj_vector_for_ctx = tf.expand_dims( tf.layers.dense(vector_value, self.attention_state_size, name="vector_ctx_proj"), 1) vector_logit = tf.reduce_sum( self.attn_v * tf.tanh(projected_decoder_state + proj_vector_for_logit), [2]) + vector_bias assert_shape(vector_logit, [-1, 1]) return proj_vector_for_ctx, vector_logit
def _vector_logit(self, projected_decoder_state: tf.Tensor, vector_value: tf.Tensor, scope: str) -> tf.Tensor: """Get logit for a single vector, e.g., sentinel vector.""" assert_shape(projected_decoder_state, [-1, 1, -1]) assert_shape(vector_value, [-1, -1]) with tf.variable_scope("{}_logit".format(scope)): vector_bias = get_variable( "vector_bias", [], initializer=tf.zeros_initializer()) proj_vector_for_logit = tf.expand_dims( tf.layers.dense(vector_value, self.attention_state_size, name="vector_projection"), 1) if self._share_projections: proj_vector_for_ctx = proj_vector_for_logit else: proj_vector_for_ctx = tf.expand_dims( tf.layers.dense(vector_value, self.attention_state_size, name="vector_ctx_proj"), 1) vector_logit = tf.reduce_sum( self.attn_v * tf.tanh(projected_decoder_state + proj_vector_for_logit), [2]) + vector_bias assert_shape(vector_logit, [-1, 1]) return proj_vector_for_ctx, vector_logit
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # pylint: disable=protected-access self._encoders_tensors = [e._attention_tensor for e in self._encoders] self._encoders_masks = [e._attention_mask for e in self._encoders] # pylint: enable=protected-access for e_m in self._encoders_masks: assert_shape(e_m, [-1, -1]) for e_t in self._encoders_tensors: assert_shape(e_t, [-1, -1, -1]) with tf.variable_scope(self.scope): self.encoder_projections_for_logits = \ self.get_encoder_projections("logits_projections") self.encoder_attn_biases = [ tf.get_variable(name="attn_bias_{}".format(i), shape=[], initializer=tf.constant_initializer(0.)) for i in range(len(self._encoders_tensors))] if self._share_projections: self.encoder_projections_for_ctx = \ self.encoder_projections_for_logits else: self.encoder_projections_for_ctx = \ self.get_encoder_projections("context_projections") if self._use_sentinels: self._encoders_masks.append( tf.ones([tf.shape(self._encoders_masks[0])[0], 1])) self.masks_concat = tf.concat(self._encoders_masks, 1)
def attention( self, query: tf.Tensor, decoder_prev_state: tf.Tensor, decoder_input: tf.Tensor, loop_state: HierarchicalLoopState ) -> Tuple[tf.Tensor, HierarchicalLoopState]: with tf.variable_scope(self.att_scope_name): projected_state = tf.layers.dense(query, self.attention_state_size) projected_state = tf.expand_dims(projected_state, 1) assert_shape(projected_state, [-1, 1, self.attention_state_size]) attn_ctx_vectors, child_loop_states = zip(*[ a.attention(query, decoder_prev_state, decoder_input, ls) for a, ls in zip(self.attentions, loop_state.child_loop_states) ]) proj_ctxs, attn_logits = [ list(t) for t in zip(*[ self._vector_logit(projected_state, ctx_vec, scope=att.name) # type: ignore for ctx_vec, att in zip(attn_ctx_vectors, self.attentions) ]) ] if self._use_sentinels: sentinel_value = _sentinel(query, decoder_prev_state, decoder_input) proj_sentinel, sentinel_logit = self._vector_logit( projected_state, sentinel_value, scope="sentinel") proj_ctxs.append(proj_sentinel) attn_logits.append(sentinel_logit) attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1)) self.attentions_in_time.append(attention_distr) if self._share_projections: output_cxts = proj_ctxs else: output_cxts = [ tf.expand_dims( tf.layers.dense(ctx_vec, self.attention_state_size, name="proj_attn_{}".format(att.name)), 1) # type: ignore for ctx_vec, att in zip(attn_ctx_vectors, self.attentions) ] if self._use_sentinels: output_cxts.append( tf.expand_dims( tf.layers.dense(sentinel_value, self.attention_state_size, name="proj_sentinel"), 1)) projections_concat = tf.concat(output_cxts, 1) context = tf.reduce_sum( tf.expand_dims(attention_distr, 2) * projections_concat, [1]) prev_loop_state = loop_state.loop_state next_contexts = tf.concat( [prev_loop_state.contexts, tf.expand_dims(context, 0)], axis=0) next_weights = tf.concat( [prev_loop_state.weights, tf.expand_dims(attention_distr, 0)], axis=0) next_loop_state = AttentionLoopState(contexts=next_contexts, weights=next_weights) next_hier_loop_state = HierarchicalLoopState( child_loop_states=list(child_loop_states), loop_state=next_loop_state) return context, next_hier_loop_state
def __init__(self, cell, mask, scale) -> None: self._cell = cell self._mask = mask assert_shape(mask, [None, cell.sate_size]) self._scale = scale
def __init__(self, name: str, data_id: str, convolutions: List[Tuple[int, int, Optional[int]]], image_height: int, image_width: int, pixel_dim: int, fully_connected: Optional[List[int]] = None, batch_normalization: bool = True, local_response_normalization: bool = True, dropout_keep_prob: float = 0.5, attention_type: Type = Attention, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Initialize a convolutional network for image processing. Args: convolutions: Configuration of convolutional layers. It is a list of triplets of integers where the values are: size of the convolutional window, number of convolutional filters, and size of max-pooling window. If the max-pooling size is set to None, no pooling is performed. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the image. pixel_dim: Number of color channels in the input images. batch_normalization: Flag whether the batch normalization should be used between the convolutional layers. local_response_normalization: Flag whether to use local response normalization between the convolutional layers. dropout_keep_prob: Probability of keeping neurons active in dropout. Dropout is done between all convolutional layers and fully connected layer. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) self.data_id = data_id self.dropout_keep_prob = dropout_keep_prob with self.use_scope(): self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout") self.train_mode = tf.placeholder(tf.bool, shape=[], name="mode_placeholder") self.input_op = tf.placeholder(tf.float32, shape=(None, image_height, image_width, pixel_dim), name="input_images") self.padding_masks = tf.placeholder(tf.float32, shape=(None, image_height, image_width, 1), name="padding_masks") last_layer = self.input_op last_padding_masks = self.padding_masks self.image_processing_layers = [] # type: List[tf.Tensor] with tf.variable_scope("convolutions"): for i, (filter_size, n_filters, pool_size) in enumerate(convolutions): with tf.variable_scope("cnn_layer_{}".format(i)): last_layer = conv2d(last_layer, n_filters, filter_size) self.image_processing_layers.append(last_layer) if pool_size: last_layer = max_pool2d(last_layer, pool_size) self.image_processing_layers.append(last_layer) last_padding_masks = max_pool2d( last_padding_masks, pool_size) if local_response_normalization: last_layer = tf.nn.local_response_normalization( last_layer) if batch_normalization: last_layer = batch_norm( last_layer, is_training=self.train_mode) last_layer = dropout(last_layer, dropout_keep_prob, self.train_mode) # last_layer shape is batch X height X width X channels last_layer = last_layer * last_padding_masks # pylint: disable=no-member last_height, last_width, last_n_channels = [ s.value for s in last_layer.get_shape()[1:] ] # pylint: enable=no-member if fully_connected is None: # we average out by the image size -> shape is number # channels from the last convolution self.encoded = tf.reduce_mean(last_layer, [1, 2]) assert_shape(self.encoded, [None, convolutions[-1][1]]) else: last_layer_flat = tf.reshape( last_layer, [-1, last_width * last_height * last_n_channels]) self.encoded = multilayer_projection( last_layer_flat, fully_connected, activation=tf.nn.relu, dropout_plc=self.dropout_placeholder) self.__attention_tensor = tf.reshape( last_layer, [-1, last_width * last_height, last_n_channels]) self.__attention_mask = tf.reshape(last_padding_masks, [-1, last_width * last_height])
def _create_encoder_graph(self): self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout") self.is_training = tf.placeholder(tf.bool, name="is_training") self.padding_weights = [ tf.placeholder(tf.float32, shape=[None], name="input_{}".format(i)) for i in range(self.max_input_len + 2) ] sentence_lengths = tf.to_int64(sum(self.padding_weights)) self.factor_inputs = {} factors = [] for data_id, vocabulary, embedding_size in zip(self.data_ids, self.vocabularies, self.embedding_sizes): # Create data placehoders. The tensors' length is max_input_len+2 # because we add explicit start and end symbols. prefix = "" if len(self.data_ids) > 1: prefix = "{}_".format(data_id) names = [ "{}input_{}".format(prefix, i) for i in range(self.max_input_len + 2) ] inputs = [ tf.placeholder(tf.int32, shape=[None], name=n) for n in names ] # Create embeddings for this factor and embed the placeholders # NOTE the initialization embeddings = tf.get_variable( "word_embeddings", shape=[len(vocabulary), embedding_size], initializer=tf.random_normal_initializer(stddev=0.01)) embedded_inputs = [ tf.nn.embedding_lookup(embeddings, i) for i in inputs ] dropped_embedded_inputs = [ tf.nn.dropout(i, self.dropout_placeholder) for i in embedded_inputs ] # Resulting shape is batch x embedding_size assert_shape(dropped_embedded_inputs, [None, embedding_size]) factors.append(dropped_embedded_inputs) # Add inputs and weights to self to be able to feed them self.factor_inputs[data_id] = inputs # Concatenate all embedded factors into one tensor # Resulting shape is batch x sum(embedding_size) # factors is a 2D list of embeddings of dims [factor-type, time-step] # by doing zip(*factors), we get a list of (factor-type) embedding # tuples indexed by the time step concatenated_factors = [ tf.concat(1, related_factors) for related_factors in zip(*factors) ] assert_shape(concatenated_factors[0], [None, sum(self.embedding_sizes)]) forward_gru, backward_gru = self._get_birnn_cells() bidi_layer = BidirectionalRNNLayer(forward_gru, backward_gru, concatenated_factors, sentence_lengths) self.outputs_bidi = bidi_layer.outputs_bidi self.encoded = bidi_layer.encoded self.__attention_tensor = tf.concat( 1, [tf.expand_dims(o, 1) for o in self.outputs_bidi]) self.__attention_tensor = tf.nn.dropout(self.__attention_tensor, self.dropout_placeholder) self.__attention_mask = tf.concat( 1, [tf.expand_dims(w, 1) for w in self.padding_weights])
def _encoders_tensors(self) -> List[tf.Tensor]: tensors = [get_attention_states(e) for e in self._encoders] for e_t in tensors: assert_shape(e_t, [-1, -1, -1]) return tensors
def __init__(self, cell, mask, scale): self._cell = cell self._mask = mask assert_shape(mask, [None, cell.sate_size]) self._scale = scale
def attention(self, query: tf.Tensor, decoder_prev_state: tf.Tensor, decoder_input: tf.Tensor, loop_state: HierarchicalLoopState) -> Tuple[ tf.Tensor, HierarchicalLoopState]: with tf.variable_scope(self.att_scope_name): projected_state = tf.layers.dense(query, self.attention_state_size) projected_state = tf.expand_dims(projected_state, 1) assert_shape(projected_state, [-1, 1, self.attention_state_size]) attn_ctx_vectors, child_loop_states = zip(*[ a.attention(query, decoder_prev_state, decoder_input, ls) for a, ls in zip(self.attentions, loop_state.child_loop_states)]) proj_ctxs, attn_logits = [list(t) for t in zip(*[ self._vector_logit(projected_state, ctx_vec, scope=att.name) # type: ignore for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)])] if self._use_sentinels: sentinel_value = _sentinel(query, decoder_prev_state, decoder_input) proj_sentinel, sentinel_logit = self._vector_logit( projected_state, sentinel_value, scope="sentinel") proj_ctxs.append(proj_sentinel) attn_logits.append(sentinel_logit) attention_distr = tf.nn.softmax(tf.concat(attn_logits, 1)) self.attentions_in_time.append(attention_distr) if self._share_projections: output_cxts = proj_ctxs else: output_cxts = [ tf.expand_dims( tf.layers.dense(ctx_vec, self.attention_state_size, name="proj_attn_{}".format( att.name)), 1) # type: ignore for ctx_vec, att in zip(attn_ctx_vectors, self.attentions)] if self._use_sentinels: output_cxts.append(tf.expand_dims( tf.layers.dense( sentinel_value, self.attention_state_size, name="proj_sentinel"), 1)) projections_concat = tf.concat(output_cxts, 1) context = tf.reduce_sum( tf.expand_dims(attention_distr, 2) * projections_concat, [1]) prev_loop_state = loop_state.loop_state next_contexts = tf.concat( [prev_loop_state.contexts, tf.expand_dims(context, 0)], axis=0) next_weights = tf.concat( [prev_loop_state.weights, tf.expand_dims(attention_distr, 0)], axis=0) next_loop_state = AttentionLoopState( contexts=next_contexts, weights=next_weights) next_hier_loop_state = HierarchicalLoopState( child_loop_states=list(child_loop_states), loop_state=next_loop_state) return context, next_hier_loop_state
def __init__(self, name: str, data_id: str, convolutions: List[Tuple[int, int, Optional[int]]], image_height: int, image_width: int, pixel_dim: int, batch_normalization: bool = True, local_response_normalization: bool = True, dropout_keep_prob: float = 0.5, attention_type: Type = Attention, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Initialize a convolutional network for image processing. Args: convolutions (list): Configuration convolutional layers. It is a list of tripplets of integers where the values are: size of the convolutional window, number of convolutional filters, and size of max-pooling window. If the max-pooling size is set to None, no pooling is performed. data_id: Identifier of the data series in the dataset. image_height: Height of the input image in pixels. image_width: Width of the images (padded) pixel_dim: Number of color channels in the input images. batch_normalization: Flag whether the batch normalization should be used between the convolutional layers. local_response_normalization: Flag whether to use local response normalization between the convolutional layers. dropout_placeholder: Placeholder keeping the dropout keeping probability """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type) self.convolutions = convolutions self.data_id = data_id self.image_height = image_height self.image_width = image_width self.pixel_dim = pixel_dim self.dropout_keep_prob = dropout_keep_prob with tf.variable_scope(name): self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout") self.is_training = tf.placeholder(tf.bool, name="is_training") self.input_op = tf.placeholder(tf.float32, shape=(None, image_height, image_width, pixel_dim), name="input_images") self.padding_masks = tf.placeholder(tf.float32, shape=(None, image_height, image_width, 1), name="padding_masks") last_layer = self.input_op last_padding_masks = self.padding_masks last_n_channels = pixel_dim self.is_training = tf.placeholder(tf.bool, name="is_training") self.image_processing_layers = [] # type: List[tf.Tensor] with tf.variable_scope("convolutions"): for i, (filter_size, n_filters, pool_size) in enumerate(convolutions): with tf.variable_scope("cnn_layer_{}".format(i)): last_layer = _convolution(last_layer, last_n_channels, filter_size, n_filters) last_n_channels = n_filters self.image_processing_layers.append(last_layer) if pool_size: # TODO do the pooling properly last_layer = tf.nn.max_pool( last_layer, [1, 2, 2, 1], [1, 2, 2, 1], "SAME") last_padding_masks = tf.nn.max_pool( last_padding_masks, [1, 2, 2, 1], [1, 2, 2, 1], "SAME") self.image_processing_layers.append(last_layer) assert image_height % 2 == 0 image_height //= 2 assert image_width % 2 == 0 image_width //= 2 if local_response_normalization: last_layer = tf.nn.local_response_normalization( last_layer) if batch_normalization: last_layer = _batch_norm(last_layer, n_filters, self.is_training) last_layer = tf.nn.dropout( last_layer, keep_prob=self.dropout_placeholder) # last_layer shape is batch X height X width X channels last_layer = last_layer * last_padding_masks # we average out by the image size -> shape is number # channels from the last convolution self.encoded = tf.reduce_mean(last_layer, [1, 2]) # TODO assert shape assert_shape(self.encoded, [None, self.convolutions[-1][1]]) self.__attention_tensor = tf.reshape( last_layer, [-1, image_width, last_n_channels * image_height]) self.__attention_mask = tf.squeeze( tf.reduce_prod(last_padding_masks, [1]), [2])