Exemplo n.º 1
0
    def build(self, z, train):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            batch_size = tf.shape(z)[0]

            layers = [z]

            with tf.variable_scope("layer0"):
                layers.append(linear(layers[-1], 128))
                layers.append(tf.nn.relu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))

            with tf.variable_scope("layer1"):
                layers.append(linear(layers[-1], 4 * 4 * 64))
                layers.append(tf.nn.relu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))
                layers.append(tf.reshape(layers[-1], [-1, 4, 4, 64]))

            with tf.variable_scope("layer2"):
                layers.append(
                    deconv2d(layers[-1], [batch_size, 8, 8, 64],
                             d_h=self.stride,
                             d_w=self.stride,
                             k_h=self.kernel,
                             k_w=self.kernel))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))

            with tf.variable_scope("layer3"):
                layers.append(
                    deconv2d(layers[-1], [batch_size, 16, 16, 32],
                             d_h=self.stride,
                             d_w=self.stride,
                             k_h=self.kernel,
                             k_w=self.kernel))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))

            with tf.variable_scope("layer4"):
                layers.append(
                    deconv2d(layers[-1], [batch_size, 32, 32, 32],
                             d_h=self.stride,
                             d_w=self.stride,
                             k_h=self.kernel,
                             k_w=self.kernel))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))

            with tf.variable_scope("layer5"):
                layers.append(
                    deconv2d(layers[-1], [
                        batch_size, self.output_height, self.output_width,
                        self.output_depth
                    ],
                             d_h=self.stride,
                             d_w=self.stride,
                             k_h=self.kernel,
                             k_w=self.kernel))
                layers.append(tf.nn.sigmoid(layers[-1]))

            return layers[-1], layers
Exemplo n.º 2
0
    def build(self, images, train):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            layers = [images]
            with tf.variable_scope("layer0"):
                layers.append(conv2d(
                    layers[-1],
                    32,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel,
                    sn_op=None))
                layers.append(lrelu(layers[-1]))
            with tf.variable_scope("layer1"):
                layers.append(conv2d(
                    layers[-1],
                    32,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel,
                    sn_op=None))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))
            with tf.variable_scope("layer2"):
                layers.append(conv2d(
                    layers[-1],
                    64,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel,
                    sn_op=None))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))
            with tf.variable_scope("layer3"):
                layers.append(conv2d(
                    layers[-1],
                    64,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel,
                    sn_op=None))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))
            with tf.variable_scope("layer4"):
                layers.append(linear(layers[-1], 128, sn_op=None))
                layers.append(lrelu(layers[-1]))
                layers.append(batch_norm()(layers[-1], train=train))
            with tf.variable_scope("layer5"):
                layers.append(linear(layers[-1], self.output_length))

            return layers[-1], layers
Exemplo n.º 3
0
 def forward(self, 
         inputs: tf.Tensor,
         dim: int = None,
         scope: t.Union[str, tf.VariableScope] = None):
     scope = scope if scope else 'forward'
     kwargs = {'dim': dim if dim else self.project_dim,
               'keep_prob': self.keep_prob,
              }
     with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
         t = op.linear(inputs, scope='linear-1', **kwargs)
         t = op.linear(t, scope='linear-2', **kwargs)
         return t
Exemplo n.º 4
0
 def forward(self,
             inputs: tf.Tensor,
             dim: int = None,
             scope: t.Union[str, tf.VariableScope] = None):
     scope = scope if scope else 'forward'
     kwargs = {
         'dim': dim if dim else -1,
         'keep_prob': self.keep_prob,
         'weight_init': tf.truncated_normal_initializer(stddev=0.01)
     }
     with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
         t = op.linear(inputs, scope='linear-1', **kwargs)
         t = op.linear(t, scope='linear-2', **kwargs)
         return t
Exemplo n.º 5
0
    def build(self, image, train):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            layers = [image]
            with tf.variable_scope("layer0"):
                layers.append(conv2d(
                    layers[-1],
                    32,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel))
                layers.append(tf.nn.relu(layers[-1]))
            with tf.variable_scope("layer1"):
                layers.append(conv2d(
                    layers[-1],
                    32,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel))
                layers.append(tf.nn.relu(layers[-1]))
            with tf.variable_scope("layer2"):
                layers.append(conv2d(
                    layers[-1],
                    64,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel))
                layers.append(tf.nn.relu(layers[-1]))
            with tf.variable_scope("layer3"):
                layers.append(conv2d(
                    layers[-1],
                    64,
                    d_h=self.stride,
                    d_w=self.stride,
                    k_h=self.kernel,
                    k_w=self.kernel))
                layers.append(tf.nn.relu(layers[-1]))
            with tf.variable_scope("layer4"):
                layers.append(linear(layers[-1], 128))
            with tf.variable_scope("layer5-mean"):
                mean = linear(layers[-1], self.output_length)
            with tf.variable_scope("layer5-logvar"):
                logvar = linear(layers[-1], self.output_length)
            layers.append(mean)
            layers.append(logvar)

            return mean, logvar, layers
Exemplo n.º 6
0
    def build(self, z, train):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            layers = [z]

            for i in range(self.num_layers - 1):
                with tf.variable_scope("layer{}".format(i)):
                    layers.append(linear(layers[-1], self.l_dim))
                    layers.append(lrelu(layers[-1]))
            with tf.variable_scope("layer{}".format(self.num_layers - 1)):
                layers.append(linear(layers[-1], 2))
                logit = layers[-1]
                layers.append(tf.nn.softmax(layers[-1]))
                prob = layers[-1]

            return logit, prob, layers
Exemplo n.º 7
0
 def fact_impl1(self, scope, x):
     with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
         # NOTE: project to low-dimensonal space
         if self.fact_proj_dim > 0:
             x = op.linear(x, dim=self.fact_proj_dim, activation_fn=None)
         input_dim = x.get_shape()[2]
         fact_wght = op.get_variable('fact_weight', shape=(input_dim))
         fact_bias = op.get_variable('fact_bias', shape=(1))
         fact_intr = op.get_variable('fact_inter',
                                     shape=(input_dim, self.fact_intr_dim))
     l = (tf.reduce_sum(x * tf.reshape(fact_wght, [1, 1, -1]), -1) +
          fact_bias)
     # shape: [batch, seq_len]
     intr_mat = tf.matmul(fact_intr, tf.matrix_transpose(fact_intr))
     # shape: [input, input_dim]
     mask = tf.sequence_mask(tf.range(input_dim),
                             maxlen=input_dim,
                             dtype=tf.float32)
     # shape: [encode_dim, input_dim]
     p = tf.reduce_sum(
         tf.matmul(tf.expand_dims(x, 2), tf.expand_dims(x, 3)) *
         # shape: [batch, seq_len, input_dim, input_dim]
         tf.expand_dims(tf.expand_dims(intr_mat, 0), 0),
         #tf.expand_dims(tf.expand_dims(intr_mat * mask, 0), 0),
         # shape: [1, 1, input_dim, input_dim]
         [2, 3])
     # shape: [batch, seq_len]
     return l + p
Exemplo n.º 8
0
    def build(self, input_attribute, train):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            input_attribute = flatten(input_attribute)
            layers = [input_attribute]
            for i in range(self.num_layers - 1):
                with tf.variable_scope("layer{}".format(i)):
                    layers.append(linear(layers[-1], self.num_units))
                    layers.append(tf.nn.relu(layers[-1]))
                    # if (i > 0):
                    #    layers.append(batch_norm()(layers[-1], train=train))
            with tf.variable_scope("layer{}".format(self.num_layers - 1)):
                layers.append(linear(layers[-1], 1))
                # batch_size * 1
                layers.append(tf.squeeze(layers[-1], 1))
                # batch_size

            return layers[-1]
Exemplo n.º 9
0
    def build(self, image, train):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            layers = [image]
            with tf.variable_scope("layer0"):
                layers.append(
                    conv2d(layers[-1],
                           self.start_depth,
                           d_h=self.stride,
                           d_w=self.stride,
                           k_h=self.kernel,
                           k_w=self.kernel))
                layers.append(lrelu(layers[-1]))

            with tf.variable_scope("layer1"):
                layers.append(
                    conv2d(layers[-1],
                           self.start_depth * 2,
                           d_h=self.stride,
                           d_w=self.stride,
                           k_h=self.kernel,
                           k_w=self.kernel))
                layers.append(batch_norm()(layers[-1], train=train))
                layers.append(lrelu(layers[-1]))

            with tf.variable_scope("layer2"):
                layers.append(
                    conv2d(layers[-1],
                           self.start_depth * 4,
                           d_h=self.stride,
                           d_w=self.stride,
                           k_h=self.kernel,
                           k_w=self.kernel))
                layers.append(batch_norm()(layers[-1], train=train))
                layers.append(lrelu(layers[-1]))

            with tf.variable_scope("layer3"):
                layers.append(
                    conv2d(layers[-1],
                           self.start_depth * 8,
                           d_h=self.stride,
                           d_w=self.stride,
                           k_h=self.kernel,
                           k_w=self.kernel))
                layers.append(batch_norm()(layers[-1], train=train))
                layers.append(lrelu(layers[-1]))

            with tf.variable_scope("layer4"):
                layers.append(linear(layers[-1], self.output_length))

            return layers[-1], layers
Exemplo n.º 10
0
Arquivo: diin.py Projeto: jie-mei/NLI
 def self_attent(x, padded_len):
     t1 = tf.tile(tf.expand_dims(x, 2), [1, 1, tf.shape(x)[1], 1])
     t2 = tf.tile(tf.expand_dims(x, 1), [1, tf.shape(x)[1], 1, 1])
     # shape: [batch, seq_len, seq_len, encode_dim]
     t = tf.reshape(
         tf.concat([t1, t2, t1 * t2], 3),
         [batch_size, padded_len**2, 3 * self.encode_dim])
     # shape: [batch, seq_len^2, 3 * encode_dim]
     att = op.linear(t, dim=1, bias=None, activation_fn=None)
     # shape: [batch, seq_len^2, 1]
     att = tf.reshape(att, [batch_size, padded_len, padded_len])
     # shape: [batch, seq_len, seq_len]
     soft_align = tf.einsum('bik,bkj->bij', tf.nn.softmax(att), x)
     return op.gated_fuse(x, soft_align)
Exemplo n.º 11
0
    def fact_impl2(self, scope, x):
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            if self.fact_proj_dim > 0:
                x = op.linear(x, dim=self.fact_proj_dim, activation_fn=None)
            input_dim = x.get_shape()[2]
            fact_wght = op.get_variable('fact_weight', shape=(input_dim))
            fact_bias = op.get_variable('fact_bias', shape=(1))
            fact_intr = op.get_variable('fact_inter',
                                        shape=(input_dim, self.fact_intr_dim))
        l = (tf.reduce_sum(x * tf.reshape(fact_wght, [1, 1, -1]), -1) +
             fact_bias)
        # shape: [batch, seq_len]

        intr_mat = tf.matmul(fact_intr, tf.matrix_transpose(fact_intr))
        # shape: [input, input_dim]
        mask = tf.sequence_mask(tf.range(input_dim),
                                maxlen=input_dim,
                                dtype=tf.float32)
        # shape: [encode_dim, input_dim]

        i = tf.constant(0)
        x_shape = tf.shape(x)
        batch_size, seq_len = x_shape[0], x_shape[1]
        p = tf.reshape(tf.zeros([batch_size]), [batch_size, -1])

        def loop_cond(i, x, p, seq_len):
            return tf.less(i, seq_len)

        def loop_body(i, x, p, seq_len):
            x_vect = x[:, i]
            # shape: [batch, input_dim]
            #x_mat = tf.matmul(tf.expand_dims(x_vect, 1),
            #                  tf.expand_dims(x_vect, 2))
            # NOTE: Avoid Internal Error: Blas xGEMMBatched launch failed
            x_mat = (tf.tile(tf.expand_dims(x_vect, 1), [1, input_dim, 1]) *
                     tf.tile(tf.expand_dims(x_vect, 2), [1, 1, input_dim]))
            # shape: [batch, input_dim, input_dim]
            p_i = tf.reduce_sum(intr_mat * x_mat, [1, 2])
            p_i = tf.expand_dims(p_i, 1)
            # shape: [batch, 1]
            p = tf.concat([p, p_i], 1)
            return [i, x, p, seq_len]

        _, _, p_loop, _ = tf.while_loop(loop_cond,
                                        loop_body, [i, x, p, seq_len],
                                        parallel_iterations=1)
        return l + p_loop[:, 1:]
Exemplo n.º 12
0
    def __init__(
        self,
        embeddings: embed.IndexedWordEmbedding,
        class_num: int,
        project_dim: int,
    ) -> None:
        super(ResModel, self).__init__()
        self._class_num = class_num
        self.project_dim = project_dim
        self.keep_prob = tf.placeholder(tf.float32, shape=[])

        def mask(x, x_len):
            # Explict mask the paddings.
            mask = tf.sequence_mask(x_len, tf.shape(x)[1], dtype=tf.float32)
            return tf.expand_dims(mask, -1)

        # mask1, mask2 = mask(self.x1, self.len1), mask(self.x2, self.len2)

        with tf.variable_scope('embed') as s:
            embed = tf.constant(embeddings.get_embeddings(),
                                dtype=tf.float32,
                                name='embeddings')
            x1, x2 = map(lambda x: tf.gather(embed, x), [self.x1, self.x2])

        for i in range(1, 11):
            x1, x2 = self.layer(x1, x2, 'res-layer-%d' % i)

        with tf.variable_scope('aggregate') as s:
            v = tf.concat([
                tf.reduce_max(x1, axis=1),
                tf.reduce_max(x2, axis=1),
                tf.reduce_sum(x1, axis=1),
                tf.reduce_sum(x2, axis=1)
            ], 1)
            y_hat = self.forward(v)
            y_hat = op.linear(y_hat, dim=self._class_num, activation_fn=None)

        self.evaluate_and_loss(y_hat)
Exemplo n.º 13
0
 def linear_BNReLU(x, scope):
     with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
         x = op.linear(x, dim=self.project_dim, activation_fn=None)
         x = tf.layers.batch_normalization(x)
         x = tf.nn.relu(x)
         return x
Exemplo n.º 14
0
    def build(self, image, train, sn_op):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            with tf.variable_scope("shared"):
                layers = [image]
                with tf.variable_scope("layer0"):
                    layers.append(
                        conv2d(layers[-1],
                               self.start_depth,
                               d_h=self.stride,
                               d_w=self.stride,
                               k_h=self.kernel,
                               k_w=self.kernel,
                               sn_op=sn_op))
                    layers.append(lrelu(layers[-1]))

                with tf.variable_scope("layer1"):
                    layers.append(
                        conv2d(layers[-1],
                               self.start_depth * 2,
                               d_h=self.stride,
                               d_w=self.stride,
                               k_h=self.kernel,
                               k_w=self.kernel,
                               sn_op=sn_op))
                    layers.append(lrelu(layers[-1]))

                with tf.variable_scope("layer2"):
                    layers.append(
                        conv2d(layers[-1],
                               self.start_depth * 4,
                               d_h=self.stride,
                               d_w=self.stride,
                               k_h=self.kernel,
                               k_w=self.kernel,
                               sn_op=sn_op))
                    layers.append(lrelu(layers[-1]))

                with tf.variable_scope("layer3"):
                    layers.append(
                        conv2d(layers[-1],
                               self.start_depth * 8,
                               d_h=self.stride,
                               d_w=self.stride,
                               k_h=self.kernel,
                               k_w=self.kernel,
                               sn_op=sn_op))

                    layers.append(lrelu(layers[-1]))

            with tf.variable_scope("x"):
                image_layers = [layers[-1]]
                with tf.variable_scope("layer0"):
                    image_layers.append(
                        linear(image_layers[-1], 1, sn_op=sn_op))
                    image_layers.append(tf.nn.sigmoid(image_layers[-1]))

            with tf.variable_scope("q"):
                q_layers = [layers[-1]]
                with tf.variable_scope("layer0"):
                    q_layers.append(
                        linear(q_layers[-1], self.output_length, sn_op=sn_op))

            layers.extend(image_layers)
            layers.extend(q_layers)

            return image_layers[-1], q_layers[-1], layers
Exemplo n.º 15
0
Arquivo: test.py Projeto: jie-mei/NLI
    def __init__(
        self,
        embeddings: embed.IndexedWordEmbedding,
        class_num: int,
        scale_l1: float = 0.0,
        scale_l2: float = 0.0,
        lstm_unit: int = 300,
    ) -> None:
        super(Test, self).__init__()
        self._class_num = class_num
        self.scale_l1 = scale_l1
        self.scale_l2 = scale_l2
        self.lstm_unit = lstm_unit

        self.batch_size = tf.shape(self.x1)[0]
        self.embed_dim = embeddings.get_embeddings().shape[-1]
        self.keep_prob = tf.placeholder(tf.float32, shape=[])
        self.op_var_kwargs = {
            'scale_l1': self.scale_l1,
            'scale_l2': self.scale_l2
        }
        self.op_kwargs = {
            'scale_l1': self.scale_l1,
            'scale_l2': self.scale_l2,
            'keep_prob': self.keep_prob,
            'drop_after': False
        }

        with tf.variable_scope('embed') as s:
            #embed_init_var = embeddings.get_embeddings()
            #embed = op.get_variable('embeddings',
            #        shape=embed_init_var.shape,
            #        initializer=tf.constant_initializer(embed_init_var))
            embed = tf.constant(embeddings.get_embeddings(),
                                dtype=tf.float32,
                                name='embeddings')
            embed_dim = embed.get_shape()[-1]
            x1, x2 = map(lambda x: tf.gather(embed, x), [self.x1, self.x2])

        with tf.variable_scope('unfold', reuse=tf.AUTO_REUSE) as s:
            x1 = self.unfold_tree(x1, self.temp1, self.tag1, self.len1, 'x1')
            x2 = self.unfold_tree(x2, self.temp2, self.tag2, self.len2, 'x2')

        with tf.variable_scope('encode', reuse=tf.AUTO_REUSE) as s:
            x1, x2 = map(lambda x: tf.nn.dropout(x, self.keep_prob), [x1, x2])
            x1, x2 = map(self.bilstm, [x1, x2])
            # shape: [batch, seq_len, embed_dim * 2]

        with tf.variable_scope('attent') as s:
            sim = tf.matmul(x1, tf.matrix_transpose(x2))
            alpha = tf.matmul(tf.nn.softmax(tf.matrix_transpose(sim)), x1)
            beta = tf.matmul(tf.nn.softmax(sim), x2)
            x1 = tf.concat([x1, beta, x1 * beta, x1 - beta], 2)
            x2 = tf.concat([x2, alpha, x2 * alpha, x2 - alpha], 2)
            # shape: [batch, seq_len, embed_dim * 8]

        with tf.variable_scope('decode', reuse=tf.AUTO_REUSE) as s:
            x1, x2 = map(lambda x: op.linear(x, embed_dim, **self.op_kwargs),
                         [x1, x2])
            # NOTE: dropout here in the author's code
            # shape: [batch, seq_len, embed_dim]
            x1, x2 = map(self.bilstm, [x1, x2])
            # shape: [batch, seq_len, embed_dim * 2]

        with tf.variable_scope('aggregate') as s:

            def pool(x):
                return tf.concat(
                    [tf.reduce_sum(x, axis=1),
                     tf.reduce_max(x, axis=1)], 1)

            y_hat = op.linear(tf.concat([pool(x1), pool(x2)], 1),
                              dim=embed_dim,
                              activation_fn=tf.nn.tanh,
                              scope='linear-1',
                              **self.op_kwargs)
            # shape: [batch, embed_dim * 8]
            y_hat = op.linear(y_hat,
                              dim=self._class_num,
                              activation_fn=None,
                              scope='linear-2',
                              **self.op_kwargs)
            # shape: [batch, class_num]

        self.evaluate_and_loss(y_hat)
Exemplo n.º 16
0
 def co_attent(t1, t2):
     t1 = op.linear(t1, **op_kwargs)
     t2 = op.linear(t2, **op_kwargs)
     return tf.matmul(t1, tf.matrix_transpose(t2))
Exemplo n.º 17
0
    def __init__(self,
            embeddings: embed.IndexedWordEmbedding,
            class_num: int,
            project_dim: int = 500,
            ) -> None:
        super(AttentiveModel, self).__init__()
        self.project_dim = project_dim
        self._class_num = class_num

        self.keep_prob = tf.placeholder(tf.float32, shape=[])

        def mask(x, x_len):
            # Explict mask the paddings.
            mask = tf.sequence_mask(x_len, tf.shape(x)[1], dtype=tf.float32)
            return tf.expand_dims(mask, -1)
        mask1, mask2 = mask(self.x1, self.len1), mask(self.x2, self.len2)

        with tf.variable_scope('embed') as s:
            embed = tf.constant(embeddings.get_embeddings(),
                                dtype=tf.float32,
                                name='embeddings')
            x1, x2 = map(lambda x: tf.gather(embed, x), [self.x1, self.x2])

        x1, x2 = map(lambda x: op.linear(x, scope='project', dim=500), [x1, x2])
        x1, x2 = map(lambda x: op.highway(x, scope='highway-1', dim=500), [x1, x2])
        x1, x2 = map(lambda x: op.highway(x, scope='highway-2', dim=500), [x1, x2])

        with tf.variable_scope('attent') as s:
            def soft_align(att_fn, scope):
                with tf.variable_scope(scope) as s:
                    sim = att_fn(x1, x2)
                    alpha = tf.matmul(tf.nn.softmax(tf.matrix_transpose(sim)), x1)
                    beta  = tf.matmul(tf.nn.softmax(sim), x2)
                    return alpha, beta
            a1, b1 = soft_align(self.attention_mul,  'mul')
            #a2, b2 = soft_align(self.attention_diff, 'diff')
            #a3, b3 = soft_align(self.attention_dist, 'dist')

        with tf.variable_scope('compare') as s:
            #v1 = self.forward(tf.concat([x1, b1], 2))
            #v2 = self.forward(tf.concat([x2, a1], 2))
            v1 = op.linear(tf.concat([x1, b1], 2), dim=500)
            v2 = op.linear(tf.concat([x2, a1], 2), dim=500)

        v1, v2 = map(lambda x: op.highway(x, scope='highway-3', dim=500), [v1, v2])
        v1, v2 = map(lambda x: op.highway(x, scope='highway-4', dim=500), [v1, v2])

        with tf.variable_scope('aggregate') as s:
            # CHANGE
            #def reduce_mean(x, x_len):
            #    return (tf.reduce_sum(x, axis=1) /
            #            tf.expand_dims(tf.cast(x_len, tf.float32), -1))
            def reduce_mean(x, x_len):
                return (tf.reduce_sum(x, axis=1) /
                        tf.cast(tf.shape(x)[1], tf.float32))
            v = tf.concat([
                    #reduce_mean(v1, self.len1),
                    #reduce_mean(v2, self.len2),
                    tf.reduce_max(v1, axis=1),
                    tf.reduce_max(v2, axis=1),
                    tf.reduce_sum(v1, axis=1),
                    tf.reduce_sum(v2, axis=1)
                    ], 1)
            y_hat = self.forward(v)
            y_hat = self.linear(y_hat, dim=self._class_num)

        self.evaluate_and_loss(y_hat)
Exemplo n.º 18
0
                def compute(i, state, last_output, all_output, gen_flag,
                            all_gen_flag, all_cur_argmax, last_cell_output):
                    input_all = [all_discrete_attribute]
                    if self.noise:
                        input_all.append(feature_input_noise_reshape[i])
                    if self.feed_back:
                        if feature_input_data_dim == 3:
                            input_all.append(feature_input_data_reshape[i])
                        else:
                            input_all.append(last_output)
                    input_all = tf.concat(input_all, axis=1)

                    cell_new_output, new_state = rnn_network(input_all, state)
                    new_output_all = []
                    id_ = 0
                    for j in range(self.sample_len):
                        for k in range(len(self.feature_outputs)):
                            with tf.variable_scope("output{}".format(id_),
                                                   reuse=tf.AUTO_REUSE):
                                output = self.feature_outputs[k]

                                sub_output = linear(cell_new_output,
                                                    output.dim)
                                if (output.type_ == OutputType.DISCRETE):
                                    sub_output = tf.nn.softmax(sub_output)
                                elif (output.type_ == OutputType.CONTINUOUS):
                                    if (output.normalization ==
                                            Normalization.ZERO_ONE):
                                        sub_output = tf.nn.sigmoid(sub_output)
                                    elif (output.normalization ==
                                          Normalization.MINUSONE_ONE):
                                        sub_output = tf.nn.tanh(sub_output)
                                    else:
                                        raise Exception("unknown normalization"
                                                        " type")
                                else:
                                    raise Exception("unknown output type")
                                new_output_all.append(sub_output)
                                id_ += 1
                    new_output = tf.concat(new_output_all, axis=1)

                    for j in range(self.sample_len):
                        all_gen_flag = all_gen_flag.write(
                            i * self.sample_len + j, gen_flag)
                        cur_gen_flag = tf.to_float(
                            tf.equal(
                                tf.argmax(new_output_all[(
                                    j * len(self.feature_outputs) +
                                    self.gen_flag_id)],
                                          axis=1), 0))
                        cur_gen_flag = tf.reshape(cur_gen_flag, [-1, 1])
                        all_cur_argmax = all_cur_argmax.write(
                            i * self.sample_len + j,
                            tf.argmax(
                                new_output_all[(j * len(self.feature_outputs) +
                                                self.gen_flag_id)],
                                axis=1))
                        gen_flag = gen_flag * cur_gen_flag

                    return (i + 1, new_state, new_output,
                            all_output.write(i, new_output), gen_flag,
                            all_gen_flag, all_cur_argmax, cell_new_output)
Exemplo n.º 19
0
    def build(self,
              attribute_input_noise,
              addi_attribute_input_noise,
              feature_input_noise,
              feature_input_data,
              train,
              attribute=None):
        with tf.variable_scope(self.scope_name, reuse=tf.AUTO_REUSE):
            batch_size = tf.shape(feature_input_noise)[0]

            if attribute is None:
                all_attribute = []
                all_discrete_attribute = []
                if len(self.addi_attribute_outputs) > 0:
                    all_attribute_input_noise = \
                        [attribute_input_noise,
                         addi_attribute_input_noise]
                    all_attribute_outputs = \
                        [self.real_attribute_outputs,
                         self.addi_attribute_outputs]
                    all_attribute_part_name = \
                        [self.STR_REAL, self.STR_ADDI]
                    all_attribute_out_dim = \
                        [self.real_attribute_out_dim,
                         self.addi_attribute_out_dim]
                else:
                    all_attribute_input_noise = [attribute_input_noise]
                    all_attribute_outputs = [self.real_attribute_outputs]
                    all_attribute_part_name = [self.STR_REAL]
                    all_attribute_out_dim = [self.real_attribute_out_dim]
            else:
                all_attribute = [attribute]
                all_discrete_attribute = [attribute]
                if len(self.addi_attribute_outputs) > 0:
                    all_attribute_input_noise = \
                        [addi_attribute_input_noise]
                    all_attribute_outputs = \
                        [self.addi_attribute_outputs]
                    all_attribute_part_name = \
                        [self.STR_ADDI]
                    all_attribute_out_dim = [self.addi_attribute_out_dim]
                else:
                    all_attribute_input_noise = []
                    all_attribute_outputs = []
                    all_attribute_part_name = []
                    all_attribute_out_dim = []

            for part_i in range(len(all_attribute_input_noise)):
                with tf.variable_scope("attribute_{}".format(
                        all_attribute_part_name[part_i]),
                                       reuse=tf.AUTO_REUSE):

                    if len(all_discrete_attribute) > 0:
                        layers = [
                            tf.concat([all_attribute_input_noise[part_i]] +
                                      all_discrete_attribute,
                                      axis=1)
                        ]
                    else:
                        layers = [all_attribute_input_noise[part_i]]

                    for i in range(self.attribute_num_layers - 1):
                        with tf.variable_scope("layer{}".format(i)):
                            layers.append(
                                linear(layers[-1], self.attribute_num_units))
                            layers.append(tf.nn.relu(layers[-1]))
                            layers.append(batch_norm()(layers[-1],
                                                       train=train))
                    with tf.variable_scope(
                            "layer{}".format(self.attribute_num_layers - 1),
                            reuse=tf.AUTO_REUSE):
                        part_attribute = []
                        part_discrete_attribute = []
                        for i in range(len(all_attribute_outputs[part_i])):
                            with tf.variable_scope("output{}".format(i),
                                                   reuse=tf.AUTO_REUSE):
                                output = all_attribute_outputs[part_i][i]

                                sub_output_ori = linear(layers[-1], output.dim)
                                if (output.type_ == OutputType.DISCRETE):
                                    sub_output = tf.nn.softmax(sub_output_ori)
                                    sub_output_discrete = tf.one_hot(
                                        tf.argmax(sub_output, axis=1),
                                        output.dim)
                                elif (output.type_ == OutputType.CONTINUOUS):
                                    if (output.normalization ==
                                            Normalization.ZERO_ONE):
                                        sub_output = tf.nn.sigmoid(
                                            sub_output_ori)
                                    elif (output.normalization ==
                                          Normalization.MINUSONE_ONE):
                                        sub_output = tf.nn.tanh(sub_output_ori)
                                    else:
                                        raise Exception("unknown normalization"
                                                        " type")
                                    sub_output_discrete = sub_output
                                else:
                                    raise Exception("unknown output type")
                                part_attribute.append(sub_output)
                                part_discrete_attribute.append(
                                    sub_output_discrete)
                        part_attribute = tf.concat(part_attribute, axis=1)
                        part_discrete_attribute = tf.concat(
                            part_discrete_attribute, axis=1)
                        part_attribute = tf.reshape(
                            part_attribute,
                            [batch_size, all_attribute_out_dim[part_i]])
                        part_discrete_attribute = tf.reshape(
                            part_discrete_attribute,
                            [batch_size, all_attribute_out_dim[part_i]])
                        # batch_size * dim

                    part_discrete_attribute = tf.stop_gradient(
                        part_discrete_attribute)

                    all_attribute.append(part_attribute)
                    all_discrete_attribute.append(part_discrete_attribute)

            all_attribute = tf.concat(all_attribute, axis=1)
            all_discrete_attribute = tf.concat(all_discrete_attribute, axis=1)
            all_attribute = tf.reshape(all_attribute,
                                       [batch_size, self.attribute_out_dim])
            all_discrete_attribute = tf.reshape(
                all_discrete_attribute, [batch_size, self.attribute_out_dim])

            with tf.variable_scope("feature", reuse=tf.AUTO_REUSE):
                all_cell = []
                for i in range(self.feature_num_layers):
                    with tf.variable_scope("unit{}".format(i),
                                           reuse=tf.AUTO_REUSE):
                        cell = tf.nn.rnn_cell.LSTMCell(
                            num_units=self.feature_num_units,
                            state_is_tuple=True)
                        all_cell.append(cell)
                rnn_network = tf.nn.rnn_cell.MultiRNNCell(all_cell)

                feature_input_data_dim = \
                    len(feature_input_data.get_shape().as_list())
                if feature_input_data_dim == 3:
                    feature_input_data_reshape = tf.transpose(
                        feature_input_data, [1, 0, 2])
                feature_input_noise_reshape = tf.transpose(
                    feature_input_noise, [1, 0, 2])
                # time * batch_size * ?

                if self.initial_state == RNNInitialStateType.ZERO:
                    initial_state = rnn_network.zero_state(
                        batch_size, tf.float32)
                elif self.initial_state == RNNInitialStateType.RANDOM:
                    initial_state = tf.random_normal(
                        shape=(self.feature_num_layers, 2, batch_size,
                               self.feature_num_units),
                        mean=0.0,
                        stddev=1.0)
                    initial_state = tf.unstack(initial_state, axis=0)
                    initial_state = tuple([
                        tf.nn.rnn_cell.LSTMStateTuple(initial_state[idx][0],
                                                      initial_state[idx][1])
                        for idx in range(self.feature_num_layers)
                    ])
                elif self.initial_state == RNNInitialStateType.VARIABLE:
                    initial_state = []
                    for i in range(self.feature_num_layers):
                        sub_initial_state1 = tf.get_variable(
                            "layer{}_initial_state1".format(i),
                            (1, self.feature_num_units),
                            initializer=tf.random_normal_initializer(
                                stddev=self.initial_stddev))
                        sub_initial_state1 = tf.tile(sub_initial_state1,
                                                     (batch_size, 1))
                        sub_initial_state2 = tf.get_variable(
                            "layer{}_initial_state2".format(i),
                            (1, self.feature_num_units),
                            initializer=tf.random_normal_initializer(
                                stddev=self.initial_stddev))
                        sub_initial_state2 = tf.tile(sub_initial_state2,
                                                     (batch_size, 1))
                        sub_initial_state = tf.nn.rnn_cell.LSTMStateTuple(
                            sub_initial_state1, sub_initial_state2)
                        initial_state.append(sub_initial_state)
                    initial_state = tuple(initial_state)
                else:
                    return NotImplementedError

                time = feature_input_noise.get_shape().as_list()[1]
                if time is None:
                    time = tf.shape(feature_input_noise)[1]

                def compute(i, state, last_output, all_output, gen_flag,
                            all_gen_flag, all_cur_argmax, last_cell_output):
                    input_all = [all_discrete_attribute]
                    if self.noise:
                        input_all.append(feature_input_noise_reshape[i])
                    if self.feed_back:
                        if feature_input_data_dim == 3:
                            input_all.append(feature_input_data_reshape[i])
                        else:
                            input_all.append(last_output)
                    input_all = tf.concat(input_all, axis=1)

                    cell_new_output, new_state = rnn_network(input_all, state)
                    new_output_all = []
                    id_ = 0
                    for j in range(self.sample_len):
                        for k in range(len(self.feature_outputs)):
                            with tf.variable_scope("output{}".format(id_),
                                                   reuse=tf.AUTO_REUSE):
                                output = self.feature_outputs[k]

                                sub_output = linear(cell_new_output,
                                                    output.dim)
                                if (output.type_ == OutputType.DISCRETE):
                                    sub_output = tf.nn.softmax(sub_output)
                                elif (output.type_ == OutputType.CONTINUOUS):
                                    if (output.normalization ==
                                            Normalization.ZERO_ONE):
                                        sub_output = tf.nn.sigmoid(sub_output)
                                    elif (output.normalization ==
                                          Normalization.MINUSONE_ONE):
                                        sub_output = tf.nn.tanh(sub_output)
                                    else:
                                        raise Exception("unknown normalization"
                                                        " type")
                                else:
                                    raise Exception("unknown output type")
                                new_output_all.append(sub_output)
                                id_ += 1
                    new_output = tf.concat(new_output_all, axis=1)

                    for j in range(self.sample_len):
                        all_gen_flag = all_gen_flag.write(
                            i * self.sample_len + j, gen_flag)
                        cur_gen_flag = tf.to_float(
                            tf.equal(
                                tf.argmax(new_output_all[(
                                    j * len(self.feature_outputs) +
                                    self.gen_flag_id)],
                                          axis=1), 0))
                        cur_gen_flag = tf.reshape(cur_gen_flag, [-1, 1])
                        all_cur_argmax = all_cur_argmax.write(
                            i * self.sample_len + j,
                            tf.argmax(
                                new_output_all[(j * len(self.feature_outputs) +
                                                self.gen_flag_id)],
                                axis=1))
                        gen_flag = gen_flag * cur_gen_flag

                    return (i + 1, new_state, new_output,
                            all_output.write(i, new_output), gen_flag,
                            all_gen_flag, all_cur_argmax, cell_new_output)

                (i, state, _, feature, _, gen_flag, cur_argmax,
                 cell_output) = \
                    tf.while_loop(
                        lambda a, b, c, d, e, f, g, h:
                        tf.logical_and(a < time,
                                       tf.equal(tf.reduce_max(e), 1)),
                        compute,
                        (0,
                         initial_state,
                         feature_input_data if feature_input_data_dim == 2
                            else feature_input_data_reshape[0],
                         tf.TensorArray(tf.float32, time),
                         tf.ones((batch_size, 1)),
                         tf.TensorArray(tf.float32, time * self.sample_len),
                         tf.TensorArray(tf.int64, time * self.sample_len),
                         tf.zeros((batch_size, self.feature_num_units))))

                def fill_rest(i, all_output, all_gen_flag, all_cur_argmax):
                    all_output = all_output.write(
                        i, tf.zeros((batch_size, self.feature_out_dim)))

                    for j in range(self.sample_len):
                        all_gen_flag = all_gen_flag.write(
                            i * self.sample_len + j, tf.zeros((batch_size, 1)))
                        all_cur_argmax = all_cur_argmax.write(
                            i * self.sample_len + j,
                            tf.zeros((batch_size, ), dtype=tf.int64))
                    return (i + 1, all_output, all_gen_flag, all_cur_argmax)

                _, feature, gen_flag, cur_argmax = tf.while_loop(
                    lambda a, b, c, d: a < time, fill_rest,
                    (i, feature, gen_flag, cur_argmax))

                feature = feature.stack()
                # time * batch_size * (dim * sample_len)
                gen_flag = gen_flag.stack()
                # (time * sample_len) * batch_size * 1
                cur_argmax = cur_argmax.stack()

                gen_flag = tf.transpose(gen_flag, [1, 0, 2])
                # batch_size * (time * sample_len) * 1
                cur_argmax = tf.transpose(cur_argmax, [1, 0])
                # batch_size * (time * sample_len)
                length = tf.reduce_sum(gen_flag, [1, 2])
                # batch_size

                feature = tf.transpose(feature, [1, 0, 2])
                # batch_size * time * (dim * sample_len)
                gen_flag_t = tf.reshape(gen_flag,
                                        [batch_size, time, self.sample_len])
                # batch_size * time * sample_len
                gen_flag_t = tf.reduce_sum(gen_flag_t, [2])
                # batch_size * time
                gen_flag_t = tf.to_float(gen_flag_t > 0.5)
                gen_flag_t = tf.expand_dims(gen_flag_t, 2)
                # batch_size * time * 1
                gen_flag_t = tf.tile(gen_flag_t, [1, 1, self.feature_out_dim])
                # batch_size * time * (dim * sample_len)
                # zero out the parts after sequence ends
                feature = feature * gen_flag_t
                feature = tf.reshape(feature, [
                    batch_size, time * self.sample_len,
                    self.feature_out_dim / self.sample_len
                ])
                # batch_size * (time * sample_len) * dim

            return feature, all_attribute, gen_flag, length, cur_argmax
Exemplo n.º 20
0
 def transit(x, v):
     gate = op.linear(x, activation_fn=tf.sigmoid, scope='gate')
     return v * gate + x * (1 - gate)
Exemplo n.º 21
0
    def __init__(
        self,
        embeddings: embed.IndexedWordEmbedding,
        class_num: int,
        scale_l1: float = 0.0,
        scale_l2: float = 0.000001,
        encode_dim: int = 300,
        fact_intr_dim: int = 10,
        fact_proj_dim: int = -1,
        char_filer_width: int = 5,
        char_embed_dim: int = 8,
        char_conv_dim: int = 100,
        lstm_unit: int = 300,
    ) -> None:
        super(CAFE, self).__init__()
        self._class_num = class_num
        self.scale_l1 = scale_l1
        self.scale_l2 = scale_l2
        self.encode_dim = encode_dim
        self.fact_proj_dim = fact_proj_dim
        self.fact_intr_dim = fact_intr_dim
        self.char_filter_width = char_filer_width
        self.char_embed_dim = char_embed_dim
        self.char_conv_dim = char_conv_dim
        self.lstm_unit = lstm_unit

        self.keep_prob = tf.placeholder(tf.float32, shape=[])

        op_kwargs = {
            'scale_l1': self.scale_l1,
            'scale_l2': self.scale_l2,
            'keep_prob': self.keep_prob
        }

        with tf.variable_scope('embed') as s:
            # Word pretrained embeddings (300D)
            word_embed = tf.constant(embeddings.get_embeddings(),
                                     dtype=tf.float32,
                                     name='word_embed')
            word_embed1, word_embed2 = map(lambda x: tf.gather(word_embed, x),
                                           [self.x1, self.x2])

            # Character convolutional embeddings (`char_conv_dim`D)
            char_embed = op.get_variable('char_embed',
                                         shape=(256, char_embed_dim))
            char_filter = op.get_variable('char_filter',
                                          shape=(1, self.char_filter_width,
                                                 self.char_embed_dim,
                                                 self.char_conv_dim))

            def embed_chars(x_char):
                embed = tf.gather(char_embed, x_char)
                # shape: [batch, seq_len, word_len, embed_dim]
                conv = tf.nn.conv2d(embed, char_filter, [1, 1, 1, 1], 'VALID')
                # shape: [batch, seq_len, word_len - filter_width + 1, conv_dim]
                return tf.reduce_max(conv, 2)
                # shape: [batch, seq_len, conv_dim]

            char_embed1, char_embed2 = map(embed_chars,
                                           [self.char1, self.char2])

            # Tag one-hot embeddings (72D)
            def embed_tags(x_ids, x_tags, x_len):
                x_tags *= tf.sequence_mask(x_len,
                                           tf.shape(x_tags)[1],
                                           dtype=tf.int32)
                # shape: [batch, seq_len]
                tag_embed = tf.one_hot(x_tags,
                                       data.SNLI.TAGS,
                                       dtype=tf.float32,
                                       name='char_embed')
                return tag_embed[:, :tf.shape(x_ids)[1]]

            tag_embed1, tag_embed2 = map(
                embed_tags,
                *zip((self.x1, self.tag1, self.len1),
                     (self.x2, self.tag2, self.len2)))

            # Merge embeddings
            x1 = tf.concat([word_embed1, char_embed1, tag_embed1], 2)
            x2 = tf.concat([word_embed2, char_embed2, tag_embed2], 2)

        with tf.variable_scope('encode') as s:

            def encode(x):
                x = op.highway(x,
                               scope='hw-1',
                               dim=self.encode_dim,
                               **op_kwargs)
                x = op.highway(x,
                               scope='hw-2',
                               dim=self.encode_dim,
                               **op_kwargs)
                return x

            x1, x2 = map(encode, [x1, x2])
            # shape: [batch, seq_len, encode_dim]

        with tf.variable_scope('attent') as s:
            # Alignment
            def co_attent(t1, t2):
                t1 = op.linear(t1, **op_kwargs)
                t2 = op.linear(t2, **op_kwargs)
                return tf.matmul(t1, tf.matrix_transpose(t2))
                # shape: [batch, seq_len1, seq_len2]

            with tf.variable_scope('inter-align') as s:
                att = co_attent(x1, x2)
                inter1 = tf.matmul(tf.nn.softmax(att), x2)
                inter2 = tf.matmul(tf.nn.softmax(tf.matrix_transpose(att)), x1)
            with tf.variable_scope('intra-align') as s:

                def self_attent(x):
                    att = co_attent(x, x)
                    return x * tf.reduce_sum(att, 2, keep_dims=True)

                intra1, intra2 = map(self_attent, [x1, x2])

            def align_fact(x, x_align, scope):
                with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
                    f1 = self.fact('fact-concat', tf.concat([x, x_align], 2))
                    f2 = self.fact('fact-sub', x - x_align)
                    f3 = self.fact('fact-mul', x * x_align)
                    return tf.stack([f1, f2, f3], 2)
                # shape: [batch, seq_len, 3]

            # TODO: variables may not be shared between different facts
            #x1 = tf.concat([x1, inter1, intra1], 2)
            #x2 = tf.concat([x2, inter2, intra2], 2)
            x1 = tf.concat([
                x1,
                align_fact(x1, inter1, 'inter'),
                align_fact(x1, intra1, 'intra')
            ], 2)
            x2 = tf.concat([
                x2,
                align_fact(x2, inter2, 'inter'),
                align_fact(x2, intra2, 'intra')
            ], 2)

        with tf.variable_scope('sequence', reuse=tf.AUTO_REUSE) as s:

            def lstm_encode(x):
                # shape: [batch, seq_len, encode_dim + 6]
                outputs, states = tf.nn.dynamic_rnn(
                    cell=tf.nn.rnn_cell.LSTMCell(self.lstm_unit),
                    inputs=x,
                    dtype=tf.float32)
                outputs = tf.nn.dropout(outputs, self.keep_prob)
                return outputs

            x1, x2 = map(lstm_encode, [x1, x2])

        with tf.variable_scope('pooling') as s:

            def pool(x):
                return tf.concat([
                    tf.reduce_max(x, axis=1),
                    tf.reduce_sum(x, axis=1),
                ], 1)
                # shape: [batch, dim]

            x1, x2 = map(pool, [x1, x2])

        with tf.variable_scope('decode') as s:
            x = tf.concat([x1, x2, x1 - x2, x1 * x2], 1)
            x = op.highway(x, scope='hw-1', dim=self.encode_dim, **op_kwargs)
            x = op.highway(x, scope='hw-2', dim=self.encode_dim, **op_kwargs)
            y_hat = op.linear(x, dim=self._class_num, activation_fn=None)

        self.evaluate_and_loss(y_hat)
Exemplo n.º 22
0
Arquivo: esim.py Projeto: jie-mei/NLI
    def __init__(
        self,
        embeddings: embed.IndexedWordEmbedding,
        class_num: int,
        scale_l1: float = 0.0,
        scale_l2: float = 0.0,
        lstm_unit: int = 300,
        seq_len: int = 0,
        char_filer_width: int = 5,
        char_embed_dim: int = 8,
        char_conv_dim: int = 100,
        class_weights: t.List[float] = [1.1, 1, 1],
    ) -> None:
        super(ESIM, self).__init__()
        self._class_num = class_num
        self.class_weights = class_weights
        self.scale_l1 = scale_l1
        self.scale_l2 = scale_l2
        self.lstm_unit = lstm_unit
        self.seq_len = seq_len
        self.char_filter_width = char_filer_width
        self.char_embed_dim = char_embed_dim
        self.char_conv_dim = char_conv_dim

        op_kwargs = {
            'scale_l1': self.scale_l1,
            'scale_l2': self.scale_l2,
            'keep_prob': self.keep_prob,
            'drop_after': False
        }

        with tf.variable_scope('embed') as s:

            def set_seq_len(x):
                x_len = tf.shape(x)[1]
                return tf.cond(
                    tf.less(self.seq_len, x_len), lambda: x[:, :self.seq_len],
                    lambda: tf.pad(x, [[0, 0], [0, self.seq_len - x_len]]))

            if self.seq_len > 0:
                x1, x2 = map(set_seq_len, [self.x1, self.x2])
            else:
                x1, x2 = self.x1, self.x2

            #embed_init_var = embeddings.get_embeddings()
            #embed = op.get_variable('embeddings',
            #        shape=embed_init_var.shape,
            #        initializer=tf.constant_initializer(embed_init_var))

            #embed = tf.constant(embeddings.get_embeddings(),
            #                    dtype=tf.float32,
            #                    name='embeddings')
            #x1, x2 = map(lambda x: tf.gather(embed, x), [x1, x2])

            # Word pretrained embeddings (300D)
            word_embed = tf.constant(embeddings.get_embeddings(),
                                     dtype=tf.float32,
                                     name='word_embed')
            word_embed1, word_embed2 = map(lambda x: tf.gather(word_embed, x),
                                           [self.x1, self.x2])
            embed_dim = word_embed.get_shape()[-1]

            # Character convolutional embeddings (`char_conv_dim`D)
            char_embed = op.get_variable('char_embed',
                                         shape=(256, char_embed_dim))
            char_filter = op.get_variable('char_filter',
                                          shape=(1, self.char_filter_width,
                                                 self.char_embed_dim,
                                                 self.char_conv_dim))

            def embed_chars(x_char):
                embed = tf.gather(char_embed, x_char)
                # shape: [batch, seq_len, word_len, embed_dim]
                conv = tf.nn.conv2d(embed, char_filter, [1, 1, 1, 1], 'VALID')
                # shape: [batch, seq_len, word_len - filter_width + 1, conv_dim]
                return tf.reduce_max(conv, 2)
                # shape: [batch, seq_len, conv_dim]

            char_embed1, char_embed2 = map(embed_chars,
                                           [self.char1, self.char2])

            # Tag one-hot embeddings (72D)
            def embed_tags(x_ids, x_tags, x_len):
                x_tags *= tf.sequence_mask(x_len,
                                           tf.shape(x_tags)[1],
                                           dtype=tf.int32)
                # shape: [batch, seq_len]
                tag_embed = tf.one_hot(x_tags,
                                       data.SNLI.TAGS,
                                       dtype=tf.float32,
                                       name='char_embed')
                return tag_embed[:, :tf.shape(x_ids)[1]]

            tag_embed1, tag_embed2 = map(
                embed_tags,
                *zip((self.x1, self.tag1, self.len1),
                     (self.x2, self.tag2, self.len2)))

            # Merge embeddings
            #x1 = tf.concat([word_embed1, char_embed1, tag_embed1], 2)
            #x2 = tf.concat([word_embed2, char_embed2, tag_embed2], 2)
            x1 = tf.concat([word_embed1, char_embed1], 2)
            x2 = tf.concat([word_embed2, char_embed2], 2)

            x1 = self.unfold_tree(x1, self.temp1, self.tag1, self.len1, 'x1')
            x2 = self.unfold_tree(x2, self.temp2, self.tag2, self.len2, 'x2')

        with tf.variable_scope('encode', reuse=tf.AUTO_REUSE) as s:
            x1, x2 = map(lambda x: tf.nn.dropout(x, self.keep_prob), [x1, x2])
            #import pdb; pdb.set_trace()
            x1, x2 = map(self.bilstm, [x1, x2])
            # shape: [batch, seq_len, embed_dim * 2]

        with tf.variable_scope('attent') as s:
            sim = tf.matmul(x1, tf.matrix_transpose(x2))
            alpha = tf.matmul(tf.nn.softmax(tf.matrix_transpose(sim)), x1)
            beta = tf.matmul(tf.nn.softmax(sim), x2)
            x1 = tf.concat([x1, beta, x1 * beta, x1 - beta], 2)
            x2 = tf.concat([x2, alpha, x2 * alpha, x2 - alpha], 2)
            # shape: [batch, seq_len, embed_dim * 8]

        with tf.variable_scope('decode', reuse=tf.AUTO_REUSE) as s:
            x1, x2 = map(lambda x: op.linear(x, dim=embed_dim, **op_kwargs),
                         [x1, x2])
            # NOTE: dropout here in the author's code
            # shape: [batch, seq_len, embed_dim]
            x1, x2 = map(self.bilstm, [x1, x2])
            # shape: [batch, seq_len, embed_dim * 2]

        with tf.variable_scope('aggregate') as s:

            def pool(x):
                return tf.concat(
                    [tf.reduce_sum(x, axis=1),
                     tf.reduce_max(x, axis=1)], 1)

            y_hat = op.linear(tf.concat([pool(x1), pool(x2)], 1),
                              dim=embed_dim,
                              activation_fn=tf.nn.tanh,
                              scope='linear-1',
                              **op_kwargs)
            # shape: [batch, embed_dim * 8]
            y_hat = op.linear(y_hat,
                              dim=self._class_num,
                              activation_fn=None,
                              scope='linear-2',
                              **op_kwargs)
            # shape: [batch, class_num]

        self.evaluate_and_loss(y_hat, self.class_weights)
Exemplo n.º 23
0
 def linear(self, inputs: tf.Tensor, dim: int, bias=True):
     return op.linear(inputs, dim, activation_fn=None, bias=bias)