def __init__(
      self, sequence_length, num_classes, vocab_size,
      embedding_size, filter_sizes, num_filters, num_blocks, l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        self.filter_size = 3
        self.num_filters = num_filters
        self.use_region_emb = True
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            if self.use_region_emb:
                self.region_size = 5
                self.region_radius = self.region_size / 2
                self.k_matrix_embedding = tf.Variable(tf.random_uniform([vocab_size, self.region_size, embedding_size], -1.0, 1.0), name="k_matrix")
                self.embedded_chars = self.region_embedding(self.input_x)
                sequence_length = int(self.embedded_chars.shape[1])
            else:
                self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        # 2.two layers of convs
        conv = self.dpcnn_two_layers_conv(self.embedded_chars_expanded)
        # 2.1 skip connection: add and activation
        b = tf.get_variable("b-inference", [self.num_filters])
        conv = tf.nn.relu(tf.nn.bias_add(conv, b), "relu-inference")
        conv = conv + self.embedded_chars_expanded
        # 3.repeat of building blocks
        for i in range(num_blocks):
            conv = self.dpcnn_pooling_two_conv(conv, i)
        # 4.max pooling
        seq_length1 = conv.get_shape().as_list()[1]
        seq_length2 = conv.get_shape().as_list()[2]
        pooling = tf.nn.max_pool(conv, ksize=[1, seq_length1, seq_length2, 1], strides=[1, 1, 1, 1], padding='VALID',name="pool")
        fc_hidden_size = pooling.get_shape().as_list()[-1]
        self.h_pool_flat = tf.squeeze(pooling)
        # Fully Connected Layer
        with tf.name_scope("fc"):
            
            W_fc = tf.Variable(tf.truncated_normal(shape=[fc_hidden_size, fc_hidden_size],\
                stddev=0.1, dtype=tf.float32), name="W_fc")
            self.fc = tf.matmul(self.h_pool_flat, W_fc)
            self.fc_bn = tf.layers.batch_normalization(self.fc, training=self.is_training)
            self.fc_out = tf.nn.relu(self.fc_bn, name="relu")
        # Highway Layer
        self.highway = highway(self.fc_out, self.fc_out.get_shape()[1], num_layers=1, bias=-0.5, scope="Highway")

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[fc_hidden_size, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
            self.correct_pred_num = tf.reduce_sum(tf.cast(correct_predictions, tf.int32), name="correct_pred_num")
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        self.use_region_emb = True
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                 name="W")
            if self.use_region_emb:
                self.region_size = 5
                self.region_radius = self.region_size / 2
                self.k_matrix_embedding = tf.Variable(tf.random_uniform(
                    [vocab_size, self.region_size, embedding_size], -1.0, 1.0),
                                                      name="k_matrix")
                self.embedded_chars = self.region_embedding(self.input_x)
                sequence_length = int(self.embedded_chars.shape[1])
            else:
                self.embedded_chars = tf.nn.embedding_lookup(
                    self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                conv = tf.nn.conv2d(self.embedded_chars_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                conv_bn = tf.layers.batch_normalization(
                    conv, training=self.is_training)
                # Apply nonlinearity
                h = tf.nn.relu(conv_bn, name="relu")
                # Maxpooling over the outputs
                pool_size = sequence_length - filter_size + 1
                pooled = self._max_pooling(h, pool_size)
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Fully Connected Layer
        with tf.name_scope("fc"):
            fc_hidden_size = num_filters_total
            W_fc = tf.Variable(tf.truncated_normal(shape=[num_filters_total, fc_hidden_size],\
                stddev=0.1, dtype=tf.float32), name="W_fc")
            self.fc = tf.matmul(self.h_pool_flat, W_fc)
            self.fc_bn = tf.layers.batch_normalization(
                self.fc, training=self.is_training)
            self.fc_out = tf.nn.relu(self.fc_bn, name="relu")
        # Highway Layer
        self.highway = highway(self.fc_out,
                               self.fc_out.get_shape()[1],
                               num_layers=1,
                               bias=-0.5,
                               scope="Highway")

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W_out = tf.Variable(tf.truncated_normal(shape=[fc_hidden_size, num_classes],\
                stddev=0.1, dtype=tf.float32), name="W_out")
            b_out = tf.Variable(tf.constant(0.1, shape=[num_classes]),
                                name="b_out")
            l2_loss += tf.nn.l2_loss(W_out)
            l2_loss += tf.nn.l2_loss(b_out)
            self.scores = tf.nn.xw_plus_b(self.h_drop,
                                          W_out,
                                          b_out,
                                          name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
            self.correct_pred_num = tf.reduce_sum(tf.cast(
                correct_predictions, tf.int32),
                                                  name="correct_pred_num")
示例#3
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 filter_sizes=[7, 5],
                 num_filters=[8, 14],
                 top_k=6,
                 k1=12,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        self.use_region_emb = False
        self.fc_hidden_size = 2048
        self.use_dialate_conv = False
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                 name="W")
            if self.use_region_emb:
                self.region_size = 5
                self.region_radius = self.region_size / 2
                self.k_matrix_embedding = tf.Variable(tf.random_uniform(
                    [vocab_size, self.region_size, embedding_size], -1.0, 1.0),
                                                      name="k_matrix")
                self.embedded_chars = self.region_embedding(self.input_x)
                sequence_length = int(self.embedded_chars.shape[1])
            else:
                self.embedded_chars = tf.nn.embedding_lookup(
                    self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)

        # Create a dcnn + dynamic k max pooling layer
        with tf.name_scope("conv_pooling_layer"):
            if self.use_dialate_conv:
                #first layer
                W1 = tf.Variable(tf.truncated_normal(
                    [filter_sizes[0], 2, 1, num_filters[0]], stddev=0.1),
                                 name="W1")
                b1 = tf.Variable(tf.constant(0.1, shape=[num_filters[0]]),
                                 name="b1")
                conv1 = self.dialate_conv_layer(self.embedded_chars_expanded,
                                                W1,
                                                b1,
                                                rate=2,
                                                scope="dialate_conv_1")
                conv_bn1 = tf.layers.batch_normalization(
                    conv1, training=self.is_training)
                pooled1 = self.folding_k_max_pooling(conv_bn1, k1)

                #second layer
                W2 = tf.Variable(tf.truncated_normal(
                    [filter_sizes[1], 3, num_filters[0], num_filters[1]],
                    stddev=0.1),
                                 name="W2")
                b2 = tf.Variable(tf.constant(0.1, shape=[num_filters[1]]),
                                 name="b2")
                conv2 = self.dialate_conv_layer(pooled1,
                                                W2,
                                                b2,
                                                rate=2,
                                                scope="dialate_conv_2")
                conv_bn2 = tf.layers.batch_normalization(
                    conv2, training=self.is_training)
                pooled2 = self.folding_k_max_pooling(conv_bn2, top_k)
            else:
                W1 = tf.Variable(tf.truncated_normal(
                    [filter_sizes[0], embedding_size, 1, num_filters[0]],
                    stddev=0.1),
                                 name="W1")
                b1 = tf.Variable(tf.constant(
                    0.1, shape=[num_filters[0], embedding_size]),
                                 name="b1")
                conv1 = self.conv1d_layer(self.embedded_chars_expanded,
                                          W1,
                                          b1,
                                          scope="conv1d_1")
                conv_bn1 = tf.layers.batch_normalization(
                    conv1, training=self.is_training)
                pooled1 = self.folding_k_max_pooling(conv_bn1, k1)

                W2 = tf.Variable(tf.truncated_normal([
                    filter_sizes[1], embedding_size, num_filters[0],
                    num_filters[1]
                ],
                                                     stddev=0.1),
                                 name="W2")
                b2 = tf.Variable(tf.constant(
                    0.1, shape=[num_filters[1], embedding_size]),
                                 name="b2")
                conv2 = self.conv1d_layer(pooled1, W2, b2, scope="conv1d_2")
                conv_bn2 = tf.layers.batch_normalization(
                    conv2, training=self.is_training)
                pooled2 = self.folding_k_max_pooling(conv_bn2, top_k)

        # Combine all the pooled features
        num_filters_total = int(pooled2.get_shape()[1] *
                                pooled2.get_shape()[2] *
                                pooled2.get_shape()[3])
        self.h_pool_flat = tf.reshape(pooled2, [-1, num_filters_total])

        # Fully Connected Layer
        with tf.name_scope("fc"):
            W_fc = tf.Variable(tf.truncated_normal(shape=[num_filters_total, self.fc_hidden_size],\
                stddev=0.1, dtype=tf.float32), name="W_fc")
            self.fc = tf.matmul(self.h_pool_flat, W_fc)
            self.fc_bn = tf.layers.batch_normalization(
                self.fc, training=self.is_training)
            self.fc_out = tf.nn.relu(self.fc_bn, name="relu")
        # Highway Layer
        self.highway = highway(self.fc_out,
                               self.fc_out.get_shape()[1],
                               num_layers=1,
                               bias=-0.5,
                               scope="Highway")

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W_out = tf.Variable(tf.truncated_normal(shape=[self.fc_hidden_size, num_classes],\
                stddev=0.1, dtype=tf.float32), name="W_out")
            b_out = tf.Variable(tf.constant(0.1, shape=[num_classes]),
                                name="b_out")
            l2_loss += tf.nn.l2_loss(W_out)
            l2_loss += tf.nn.l2_loss(b_out)
            self.scores = tf.nn.xw_plus_b(self.h_drop,
                                          W_out,
                                          b_out,
                                          name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   tf.float32),
                                           name="accuracy")
            self.correct_pred_num = tf.reduce_sum(tf.cast(
                correct_predictions, tf.int32),
                                                  name="correct_num")