def highway_layer(self) -> tf.Tensor: """Highway net projection following the CNN.""" # pylint: disable=no-member cnn_out_size = self.cnn_encoded.get_shape().as_list()[-1] highway_layer = tf.reshape(self.cnn_encoded, [-1, cnn_out_size]) for i in range(self.highway_depth): highway_layer = highway(highway_layer, scope=("highway_layer_%s" % i)) return tf.reshape(highway_layer, [self.batch_size, -1, cnn_out_size])
def __init__(self, name: str, vocabulary: Vocabulary, data_id: str, embedding_size: int, segment_size: int, highway_depth: int, rnn_size: int, filters: List[Tuple[int, int]], max_input_len: Optional[int] = None, dropout_keep_prob: float = 1.0, attention_type: Optional[Any] = None, attention_fertility: int = 3, use_noisy_activations: bool = False, save_checkpoint: Optional[str] = None, load_checkpoint: Optional[str] = None) -> None: """Create a new instance of the sentence encoder. Arguments: vocabulary: Input vocabulary data_id: Identifier of the data series fed to this encoder name: An unique identifier for this encoder max_input_len: Maximum length of an encoded sequence embedding_size: The size of the embedding vector assigned to each word segment_size: The size of the segments over which we apply max-pooling. highway_depth: Depth of the highway layer. rnn_size: The size of the encoder's hidden state. Note that the actual encoder output state size will be twice as long because it is the result of concatenation of forward and backward hidden states. filters: Specification of CNN filters. It is a list of tuples specifying the filter size and number of channels. Keyword arguments: dropout_keep_prob: The dropout keep probability (default 1.0) attention_type: The class that is used for creating attention mechanism (default None) attention_fertility: Fertility parameter used with CoverageAttention (default 3). """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) Attentive.__init__(self, attention_type, attention_fertility=attention_fertility) assert check_argument_types() self.vocabulary = vocabulary self.data_id = data_id self.max_input_len = max_input_len self.embedding_size = embedding_size self.segment_size = segment_size self.highway_depth = highway_depth self.rnn_size = rnn_size self.filters = filters self.dropout_keep_p = dropout_keep_prob self.use_noisy_activations = use_noisy_activations if max_input_len is not None and max_input_len <= 0: raise ValueError("Input length must be a positive integer.") log("Initializing sentence encoder, name: '{}'".format(self.name)) with self.use_scope(): self._create_input_placeholders() with tf.variable_scope('input_projection'): self._create_embedding_matrix() embedded_inputs = self._embed(self.inputs) # type: tf.Tensor self.embedded_inputs = embedded_inputs # CNN Network pooled_outputs = [] for filter_size, num_filters in self.filters: with tf.variable_scope("conv-maxpool-%s" % filter_size): filter_shape = [filter_size, embedding_size, num_filters] w_filter = tf.get_variable( "conv_W", filter_shape, initializer=tf.random_uniform_initializer(-0.5, 0.5)) b_filter = tf.get_variable( "conv_bias", [num_filters], initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv1d(embedded_inputs, w_filter, stride=1, padding="SAME", name="conv") # Apply nonlinearity conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter)) # Max-pooling over the output segments expanded_conv_relu = tf.expand_dims(conv_relu, -1) pooled = tf.nn.max_pool( expanded_conv_relu, ksize=[1, self.segment_size, 1, 1], strides=[1, self.segment_size, 1, 1], padding="SAME", name="maxpool") pooled_outputs.append(pooled) # Combine all the pooled features self.cnn_encoded = tf.concat(pooled_outputs, axis=2) self.cnn_encoded = tf.squeeze(self.cnn_encoded, [3]) # Highway Network batch_size = tf.shape(self.cnn_encoded)[0] # pylint: disable=no-member cnn_out_size = self.cnn_encoded.get_shape().as_list()[-1] highway_layer = tf.reshape(self.cnn_encoded, [-1, cnn_out_size]) for i in range(self.highway_depth): highway_layer = highway(highway_layer, scope=("highway_layer_%s" % i)) highway_layer = tf.reshape(highway_layer, [batch_size, -1, cnn_out_size]) # BiRNN Network fw_cell, bw_cell = self.rnn_cells() # type: RNNCellTuple seq_lens = tf.ceil( tf.divide(self.sentence_lengths, self.segment_size)) seq_lens = tf.cast(seq_lens, tf.int32) outputs_bidi_tup, encoded_tup = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, highway_layer, sequence_length=seq_lens, dtype=tf.float32) self.hidden_states = tf.concat(outputs_bidi_tup, 2) with tf.variable_scope('attention_tensor'): self.__attention_tensor = self._dropout(self.hidden_states) self.encoded = tf.concat(encoded_tup, 1) log("Sentence encoder initialized")