Пример #1
0
    def add_prediction_op(self):
        print "***Building network with ReLU activation***"
        x = self.add_embedding()

        with tf.variable_scope("layer_connections"):
            with tf.variable_scope("layer_1"):
                w1 = xavier_initializer((self.config.num_features_types * self.config.embedding_dim,
                                         self.config.hidden_size), "w1")
                b1 = xavier_initializer((self.config.hidden_size,), "bias1")

                # for visualization
                preactivations = tf.add(tf.matmul(x, w1), b1, name="preactivations")
                tf.summary.histogram("preactivations", preactivations)

                non_positive_activation_fraction = tf.reduce_mean(tf.cast(tf.less_equal(preactivations, 0),
                                                                          tf.float32))
                tf.summary.scalar("non_negative_activations_fraction", non_positive_activation_fraction)

                h1 = tf.nn.dropout(tf.nn.relu(preactivations),
                                   keep_prob=self.dropout_placeholder,
                                   name="output_activations")

            with tf.variable_scope("layer_2"):
                w2 = xavier_initializer((self.config.hidden_size, self.config.num_classes), "w2")
                b2 = xavier_initializer((self.config.num_classes,), "bias2")
        with tf.variable_scope("predictions"):
            predictions = tf.add(tf.matmul(h1, w2), b2, name="prediction_logits")

        return predictions
    def add_prediction_op(self):
        print "***Building network with ReLU activation***"
        word_context_embeddings, word_context_embeddings_expanded, \
        char_context_embeddings, char_context_embeddings_expanded = self.add_embedding()

        # step - 1 :: CNN over characters
        pooled_char_outputs = []
        # char CNN
        for i, char_filter_size in enumerate(self.config.char_filter_sizes):
            with tf.variable_scope("char-conv-maxpool-%s" % char_filter_size):
                # Convolution Layer
                filter_shape = [
                    1, char_filter_size, self.config.char_embedding_dim, 1,
                    self.config.char_num_filters
                ]  # [H, W, in_c, out_c]

                # try xavior also
                # filter = random_truncated_normal_initializer(filter_shape, "filter", stddev=0.1)
                filter = tf.Variable(tf.truncated_normal(filter_shape,
                                                         stddev=0.1),
                                     name="W")
                b = tf.Variable(tf.constant(
                    0.1, shape=[self.config.char_num_filters]),
                                name="conv_bias")

                conv = tf.nn.conv3d(char_context_embeddings_expanded,
                                    filter,
                                    strides=self.config.char_stride,
                                    padding="VALID",
                                    name="conv")
                print "conv shape:", conv.get_shape().as_list()
                h = tf.nn.tanh(tf.nn.bias_add(conv, b),
                               name="relu")  # [B, new_H, new_W, out_c]

                # h_batch_norm = tf.contrib.layers.batch_norm(h,
                #                                   center=True, scale=True,
                #                                   is_training=self.is_training,
                #                                   scope='bn')
                h_shape = h.get_shape().as_list()
                h_4d = tf.reshape(h, [-1, h_shape[2], h_shape[3], h_shape[4]],
                                  "char_4d_h")

                pooled = tf.nn.max_pool(
                    h_4d,
                    ksize=[1, h_4d.get_shape().as_list()[1], 1,
                           1],  # why k_size[2] = 1?
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_shape = pooled.get_shape().as_list()
                pooled_5d = tf.reshape(pooled, [
                    -1, h_shape[1], pooled_shape[1], pooled_shape[2],
                    pooled_shape[3]
                ])

                pooled_char_outputs.append(pooled_5d)

        char_num_filters_total = self.config.char_num_filters * len(
            self.config.char_filter_sizes)
        self.h_pool_char = tf.concat(
            pooled_char_outputs,
            4)  # collect across all output channels [B, T, o_h, o_w, o_c]
        self.h_pool_char_flat = tf.reshape(
            self.h_pool_char,
            [-1, self.config.max_seq_len, char_num_filters_total
             ])  # [B, T, num_features]

        # Step -2 :: Highway layer(s) over char-CNN
        if self.config.use_highway_layer:
            print("***Adding Highway Layer on top of char CNN***")
            curr_input = self.h_pool_char_flat
            for i in range(self.config.num_highway_layers):
                curr_input_2d = tf.reshape(curr_input,
                                           [-1, char_num_filters_total])
                # Highway Layer
                with tf.variable_scope("highway_layer_" + str(i + 1)):
                    with tf.variable_scope(
                            "transform_gate"
                    ):  # use negative bias = -1 (ref: paper, blogs)
                        W_T = xavier_initializer(
                            (char_num_filters_total, char_num_filters_total),
                            "W_T")
                        # b_T = xavier_initializer((char_num_filters_total,), "bias_T")
                        b_T = tf.Variable(tf.constant(
                            -2., shape=[
                                char_num_filters_total,
                            ]),
                                          name="bias_T")
                        activations_T = tf.nn.sigmoid(
                            tf.nn.xw_plus_b(curr_input_2d,
                                            W_T,
                                            b_T,
                                            name="transform_activations"))
                        print("transformed activations shape: {}".format(
                            activations_T.get_shape().as_list()))

                    with tf.variable_scope("output_gate"):
                        W = xavier_initializer(
                            (char_num_filters_total, char_num_filters_total),
                            "W")
                        b = xavier_initializer((char_num_filters_total, ),
                                               "bias")
                        activations_output = tf.nn.relu(
                            tf.nn.xw_plus_b(curr_input_2d,
                                            W,
                                            b,
                                            name="out_activations"))
                        print("output activations shape: {}".format(
                            activations_output.get_shape().as_list()))

                    activations_carry = 1. - activations_T
                    highway_output = activations_T * activations_output + activations_carry * curr_input_2d
                    curr_input = highway_output
                    self.highway_output = tf.reshape(
                        highway_output,
                        [-1, self.config.max_seq_len, char_num_filters_total],
                        name="highway_output")

        pooled_char_cnn_word_outputs = []
        if self.config.use_highway_layer:
            char_word_context_embeddings = self.highway_output
        else:
            char_word_context_embeddings = self.h_pool_char_flat

        # Step-3: concat features
        self.context_embeddings = tf.expand_dims(
            tf.concat([word_context_embeddings, char_word_context_embeddings],
                      2), -1)
        feature_vec_len = self.context_embeddings.get_shape().as_list()[2]

        # Step - 4: word-CNN
        pooled_word_outputs = []

        # word CNN
        for i, word_filter_size in enumerate(self.config.word_filter_sizes):
            with tf.variable_scope("word-conv-maxpool-%s" % word_filter_size):
                # Convolution Layer
                filter_shape = [
                    word_filter_size, feature_vec_len, 1,
                    self.config.word_num_filters
                ]  # [H, W, in_c, out_c]

                # try xavior also
                # filter = random_truncated_normal_initializer(filter_shape, "filter", stddev=0.1)
                filter = tf.Variable(tf.truncated_normal(filter_shape,
                                                         stddev=0.1),
                                     name="W")
                b = tf.Variable(tf.constant(
                    0.1, shape=[self.config.word_num_filters]),
                                name="conv_bias")

                conv = tf.nn.conv2d(self.context_embeddings,
                                    filter,
                                    strides=self.config.word_stride,
                                    padding="VALID",
                                    name="conv")
                print "conv shape:", conv.get_shape().as_list()
                h = tf.nn.relu(tf.nn.bias_add(conv, b),
                               name="relu")  # [B, new_H, new_W, out_c]

                # h_batch_norm = tf.contrib.layers.batch_norm(h,
                #                                   center=True, scale=True,
                #                                   is_training=self.is_training,
                #                                   scope='bn')

                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, h.get_shape().as_list()[1], 1,
                           1],  # why k_size[2] = 1?
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_word_outputs.append(pooled)

        # Combine all the pooled features
        word_num_filters_total = self.config.word_num_filters * len(
            self.config.word_filter_sizes)
        self.h_pool_word = tf.concat(
            pooled_word_outputs,
            3)  # collect across all output channels  [B, T, out_H, out_d]
        self.h_pool_word_2d = tf.reshape(
            self.h_pool_word,
            [-1, word_num_filters_total])  # [B, num_features]

        activations = self.h_pool_word_2d
        feature_vec_len = word_num_filters_total
        # Step-5: FC layer + dropout
        if self.config.use_fc_layer:
            # Final (unnormalized) scores and predictions
            with tf.variable_scope("fc_layer"):
                W = xavier_initializer(
                    (feature_vec_len, self.config.fc_layer_dim), "W")
                b = xavier_initializer((self.config.fc_layer_dim, ), "bias")
                activations = tf.nn.dropout(
                    tf.nn.relu(
                        tf.nn.xw_plus_b(self.h_pool_word_2d,
                                        W,
                                        b,
                                        name="prediction_logits")),
                    keep_prob=self.dropout_placeholder_fc)
            feature_vec_len = activations.get_shape().as_list()[1]

        # Step-6: softmax layer
        with tf.variable_scope("output_layer"):
            W1 = xavier_initializer((feature_vec_len, self.config.num_classes),
                                    "W")
            b1 = xavier_initializer((self.config.num_classes, ), "bias")
            predictions = tf.nn.xw_plus_b(tf.nn.dropout(
                activations, keep_prob=self.dropout_placeholder_word),
                                          W1,
                                          b1,
                                          name="prediction_logits")

        return predictions