def basic_lstm_model(inputs):
            print "Loading basic lstm model.."
            for i in range(self.config.rnn_numLayers):
                with tf.variable_scope('rnnLayer' + str(i)):
                    lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
                    outputs, _ = tf.nn.dynamic_rnn(
                        lstm_cell,
                        inputs,
                        self.ph_seqLen,  #(b_sz, tstp, h_sz)
                        dtype=tf.float32,
                        swap_memory=True,
                        scope='badic_lstm_model_layer-' + str(i))
                    inputs = outputs  #b_sz, tstp, h_sz
            mask = mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, dim=2)  #b_sz, tstp, 1

            aggregate_state = reduce_avg(outputs,
                                         mask,
                                         tf.expand_dims(self.ph_seqLen, 1),
                                         dim=-2)  #b_sz, h_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.hidden_size])

            for i in range(self.config.fnn_numLayers):
                inputs = rnn.rnn_cell._linear(inputs,
                                              self.config.hidden_size,
                                              bias=True,
                                              scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = rnn.rnn_cell._linear(aggregate_state,
                                          self.config.class_num,
                                          bias=True,
                                          scope='fnn_softmax')
            return logits
        def basic_cnn_model(inputs):
            in_channel = self.config.embed_size
            filter_sizes = self.config.filter_sizes
            out_channel = self.config.num_filters
            input = inputs
            for layer in range(self.config.cnn_numLayers):
                with tf.name_scope("conv-layer-" + str(layer)):
                    conv_outputs = []
                    for i, filter_size in enumerate(filter_sizes):
                        with tf.variable_scope("conv-maxpool-%d" %
                                               filter_size):
                            # Convolution Layer
                            filter_shape = [
                                filter_size, in_channel, out_channel
                            ]
                            W = tf.get_variable(name='W', shape=filter_shape)
                            b = tf.get_variable(name='b', shape=[out_channel])
                            conv = tf.nn.conv1d(  # size (b_sz, tstp, out_channel)
                                input,
                                W,
                                stride=1,
                                padding="SAME",
                                name="conv")
                            # Apply nonlinearity
                            h = tf.nn.relu(tf.nn.bias_add(conv, b),
                                           name="relu")
                            conv_outputs.append(h)
                    input = tf.concat(
                        2, conv_outputs
                    )  #b_sz, tstp, out_channel*len(filter_sizes)
                    in_channel = out_channel * len(filter_sizes)
            # Maxpooling
#             mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp)
            mask = mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            pooled = tf.reduce_max(
                input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2),
                [1])  #(b_sz, out_channel*len(filter_sizes))
            #size (b_sz, out_channel*len(filter_sizes))
            inputs = tf.reshape(pooled,
                                shape=[b_sz, out_channel * len(filter_sizes)])

            for i in range(self.config.fnn_numLayers):
                inputs = rnn.rnn_cell._linear(inputs,
                                              self.config.embed_size,
                                              bias=True,
                                              scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = rnn.rnn_cell._linear(aggregate_state,
                                          self.config.class_num,
                                          bias=True,
                                          scope='fnn_softmax')
            return logits
示例#3
0
def masked_reverse_routing_iter(caps_uhat, seqLen, iter_num):
    '''

    Args:
        caps_uhat:  shape(b_sz, tstp, out_caps_num, out_caps_dim)
        seqLen:     shape(b_sz)
        iter_num:   number of iteration

    Returns:
        V_ret:      #shape(b_sz, out_caps_num, out_caps_dim)
    '''
    assert iter_num > 0
    b_sz = tf.shape(caps_uhat)[0]
    tstp = tf.shape(caps_uhat)[1]
    out_caps_num = int(caps_uhat.get_shape()[2])

    seqLen = tf.where(tf.equal(seqLen, 0), tf.ones_like(seqLen), seqLen)
    mask = mkMask(seqLen, tstp)  # shape(b_sz, tstp)
    mask = tf.tile(
        tf.expand_dims(mask, axis=-1),  # shape(b_sz, tstp, out_caps_num)
        multiples=[1, 1, out_caps_num])
    # shape(b_sz, tstp, out_caps_num)
    B = tf.zeros([b_sz, tstp, out_caps_num], dtype=tf.float32)
    B = tf.where(mask, B, tf.ones_like(B) * _MIN_NUM)
    for i in range(iter_num):
        C = tf.nn.softmax(B, dim=1)  # shape(b_sz, tstp, out_caps_num)
        C = tf.expand_dims(C, axis=-1)  # shape(b_sz, tstp, out_caps_num, 1)
        weighted_uhat = C * caps_uhat  # shape(b_sz, tstp, out_caps_num, out_caps_dim)

        S = tf.reduce_sum(weighted_uhat,
                          axis=1)  # shape(b_sz, out_caps_num, out_caps_dim)

        V = _squash(S, axes=[2])  # shape(b_sz, out_caps_num, out_caps_dim)
        V = tf.expand_dims(
            V, axis=1)  # shape(b_sz, 1, out_caps_num, out_caps_dim)
        B = tf.reduce_sum(caps_uhat * V,
                          axis=-1) + B  # shape(b_sz, tstp, out_caps_num)

    V_ret = tf.squeeze(V, axis=[1])  # shape(b_sz, out_caps_num, out_caps_dim)
    S_ret = S
    return V_ret, S_ret
示例#4
0
def masked_routing_iter(caps_uhat, seqLen, iter_num, caps_ihat=None, w_rr=None):
    '''

    Args:
        caps_uhat:  shape(b_sz, tstp, out_caps_num, out_caps_dim)
        seqLen:     shape(b_sz)
        iter_num:   number of iteration

    Returns:
        V_ret:      #shape(b_sz, out_caps_num, out_caps_dim)
    '''
    assert iter_num > 0
    b_sz = tf.shape(caps_uhat)[0]
    tstp = tf.shape(caps_uhat)[1]
    out_caps_num = int(caps_uhat.get_shape()[2])
    seqLen = tf.where(tf.equal(seqLen, 0), tf.ones_like(seqLen), seqLen)
    mask = mkMask(seqLen, tstp)  # shape(b_sz, tstp)
    floatmask = tf.cast(tf.expand_dims(mask, axis=-1), dtype=tf.float32)  # shape(b_sz, tstp, 1)
    B = tf.zeros([b_sz, tstp, out_caps_num], dtype=tf.float32)
    C_list = list()
    for i in range(iter_num):
        B_logits = B
        C = tf.nn.softmax(B, axis=2)  # shape(b_sz, tstp, out_caps_num)
        C = tf.expand_dims(C * floatmask, axis=-1)  # shape(b_sz, tstp, out_caps_num, 1)
        weighted_uhat = C * caps_uhat  # shape(b_sz, tstp, out_caps_num, out_caps_dim)
        C_list.append(C)
        S = tf.reduce_sum(weighted_uhat, axis=1)  # shape(b_sz, out_caps_num, out_caps_dim)
        V = _squash(S, axes=[2])  # shape(b_sz, out_caps_num, out_caps_dim)
        V = tf.expand_dims(V, axis=1)  # shape(b_sz, 1, out_caps_num, out_caps_dim)
        if caps_ihat == None:
            B = tf.reduce_sum(caps_uhat * V, axis=-1) + B  # shape(b_sz, tstp, out_caps_num)
        else:
            B = tf.reduce_sum(caps_uhat * V, axis=-1) + 0.1 * tf.squeeze(
                tf.matmul(tf.matmul(caps_uhat, tf.tile(w_rr, [tf.shape(caps_uhat)[0], tf.shape(caps_uhat)[1], 1, 1])),
                          tf.tile(caps_ihat, [1, tf.shape(caps_uhat)[1], 1, 1])),
                axis=-1) + B  # shape(b_sz, tstp, out_caps_num)
    V_ret = tf.squeeze(V, axis=[1])  # shape(b_sz, out_caps_num, out_caps_dim)
    S_ret = S
    C_ret = tf.squeeze(tf.stack(C_list), axis=[4])
    return V_ret, S_ret, C_ret, B_logits
        def basic_cbow_model(inputs):
            mask = mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, dim=2)  #b_sz, tstp, 1

            aggregate_state = reduce_avg(inputs,
                                         mask,
                                         tf.expand_dims(self.ph_seqLen, 1),
                                         dim=-2)  #b_sz, emb_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.embed_size])

            for i in range(self.config.fnn_numLayers):
                inputs = rnn.rnn_cell._linear(inputs,
                                              self.config.embed_size,
                                              bias=True,
                                              scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = rnn.rnn_cell._linear(aggregate_state,
                                          self.config.class_num,
                                          bias=True,
                                          scope='fnn_softmax')
            return logits
示例#6
0
    def basic_Centality(in_x, xLen, config, is_train, scope=None):
        '''

        :param in_x:    shape(b_sz, xlen, h_sz)
        :param xLen:
        :return:
        '''
        def PowerEigen(matrix, max_iter, eta):
            '''

            :param matrix: shape(b_sz, dim, dim)
            :return out: shape(b_sz, dim)

            Power method with out gradient, this way it wont't store intermediate states and saves memory.
            '''
            b_sz = tf.shape(matrix)[0]
            dim = tf.shape(matrix)[1]

            def body(time, _, y):
                '''

                :param y: shape(b_sz, dim, 1)
                :return out:     shape(b_sz, dim, 1)
                '''
                v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) +
                         EPSILON)
                y = tf.matmul(matrix, v)  # shape(b_sz, dim, 1)
                theta = tf.matmul(v, y, transpose_a=True)  # shape(b_sz, 1, 1)
                stop = tf.less(tf.reduce_sum((y - theta * v)**2, axis=1),
                               eta *
                               tf.squeeze(theta, axis=1)**2)  # shape(b_sz, 1)
                stop = tf.squeeze(stop, axis=1)  # shape(b_sz)
                acc = tf.cast(tf.logical_not(stop), dtype=time.dtype)

                assert_inf = tf.Assert(tf.reduce_all(tf.is_finite(y)),
                                       ['y inf print v', v],
                                       summarize=100000)
                assert_nan = tf.Assert(tf.logical_not(
                    tf.reduce_any(tf.is_nan(y))), ['y Nan print v', v],
                                       summarize=100000)
                with tf.control_dependencies([assert_inf, assert_nan]):
                    y = tf.identity(y)
                return time + acc, stop, y,

            def stop_cond(time, stop, _):
                '''

                :param time: shape(b_sz) int32
                :param stop: shape(b_sz,) bool
                :param _:
                :return:
                '''
                return tf.logical_and(tf.reduce_all(tf.less(time, max_iter)),
                                      tf.reduce_any(tf.logical_not(stop)))

            zero_step = tf.zeros([b_sz], dtype=tf.int32)
            stop = tf.zeros(shape=[b_sz], dtype=tf.bool)  # shape(b_sz,)
            init_eig = tf.random_normal([b_sz, dim, 1],
                                        dtype=matrix.dtype)**2 + EPSILON
            # init_eig = tf.ones([b_sz, dim, 1], dtype=matrix.dtype)

            time, stop, y = tf.while_loop(
                stop_cond,
                body=body,
                loop_vars=[zero_step, stop, init_eig],
                back_prop=False,
                swap_memory=True)  # shape(b_sz, dim, 1)
            v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) +
                     EPSILON)

            eigenValue = tf.reduce_mean(tf.matmul(matrix, v) / v,
                                        axis=1)  # shape(b_sz, 1)

            tf.summary.histogram("converge-time", time)
            return time, stop, eigenValue, v  # shape(b_sz, dim, 1)

        def PowerEigen_grad_step(matrix, max_iter, eigenVector):
            '''

            :param matrix: shape(b_sz, dim, dim)
            :param max_iter: gradient iteration number
            :param eigenVector: the converged eigen vector
            :return out: shape(b_sz, dim)

            Power method with gradient, small number of steps is sufficient to get a good approximation of gradient.
            '''
            b_sz = tf.shape(matrix)[0]
            dim = tf.shape(matrix)[1]

            def body(time, y):
                '''

                :param y: shape(b_sz, dim, 1)
                :return out:     shape(b_sz, dim, 1)
                '''
                v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) +
                         EPSILON)
                y = tf.matmul(matrix, v)  # shape(b_sz, dim, 1)
                return time + 1, y

            def stop_cond(time, _):
                '''

                :param time: shape(b_sz) int32
                :param stop: shape(b_sz,) bool
                :param _:
                :return:
                '''
                return tf.less(time, max_iter)

            zero_step = tf.constant(0, dtype=tf.int32)

            time, y = tf.while_loop(stop_cond,
                                    body=body,
                                    loop_vars=[zero_step, eigenVector],
                                    swap_memory=True)  # shape(b_sz, dim, 1)
            v = y / (tf.sqrt(tf.reduce_sum(y**2, axis=1, keep_dims=True)) +
                     EPSILON)
            eigenValue = tf.reduce_mean(tf.matmul(matrix, v) / v,
                                        axis=1)  # shape(b_sz, 1)

            return time, eigenValue, tf.transpose(
                v, perm=[0, 2, 1])  # (shape(b_sz, 1), shape(b_sz, 1, dim))

        def connect_fnn(in_x):
            '''

            :param in_x: shape(b_sz, xLen, h_sz)
            :return:
            '''
            h_sz = int(in_x.get_shape()[-1])
            units = config['units']
            left = tf.layers.dense(in_x, units=units,
                                   name='left-fnn')  #shape(b_sz, xLen, h_sz)
            right = tf.layers.dense(
                in_x, units=units, name='right-fnn')  # shape(b_sz, xLen, h_sz)
            fnn_concat_mlp = tf.nn.tanh(
                tf.expand_dims(left, axis=1) + tf.expand_dims(right, axis=2))
            fnn_concat_mlp = tf.layers.dense(fnn_concat_mlp,
                                             units=1,
                                             name='concat-mlp')
            connectivity = fnn_concat_mlp
            connectivity = tf.squeeze(connectivity, axis=-1)
            return connectivity

        h_sz = int(in_x.get_shape()[-1])
        maxLen = tf.shape(in_x)[1]
        b_sz = tf.shape(in_x)[0]
        mask = tf.expand_dims(mkMask(xLen, maxLen),
                              axis=2)  # shape(b_sz, xlen, 1)
        mask = tf.logical_and(mask,
                              tf.transpose(mask,
                                           perm=[0, 2, 1
                                                 ]))  # shape(b_sz, xlen, xlen)

        with tf.variable_scope(scope or 'Centrality'):
            if config['center-mode'] == 'center':
                in_x_cent = in_x - tf.reduce_mean(in_x, axis=2, keepdims=True)
            elif config['center-mode'] == 'ln':
                in_x_cent = layers.layer_norm(in_x, begin_norm_axis=-1)
            elif config['center-mode'] == 'none':
                in_x_cent = in_x
            else:
                raise ValueError('center-mode: %s' % config['center-mode'])

            connectivity = connect_fnn(in_x_cent)

            masked_connected = tf.where(mask, connectivity,
                                        tf.ones_like(connectivity) * NINF)

            masked_connected = tf.nn.softmax(masked_connected, axis=1)

            masked_connected = tf.where(mask, masked_connected,
                                        tf.zeros_like(connectivity))

            time, stop, _, eigenVec = PowerEigen(
                masked_connected,
                tf.cond(is_train, lambda: config['max-iter'], lambda: 5000),
                eta=config['power-eta'])

            eigenVec = tf.stop_gradient(eigenVec)

            # eigenVec = tf.transpose(eigenVec, perm=[0, 2, 1])
            _, eigenValue, eigenVec = PowerEigen_grad_step(
                masked_connected,
                max_iter=config['grad-iter'],
                eigenVector=eigenVec)

            attn_weights = tf.abs(eigenVec) / (tf.reduce_sum(
                tf.abs(eigenVec), axis=-1, keep_dims=True) + EPSILON)
            aggregated = tf.matmul(attn_weights,
                                   in_x_cent)  # shape(b_sz, 1, h_sz)
            aggregated = tf.squeeze(aggregated, axis=1)
            aggregated = layers.layer_norm(aggregated, begin_norm_axis=-1)

            return aggregated, time, stop
        def feed_back_lstm(inputs):
            def feed_back_net(inputs, seq_len, feed_back_steps):
                '''
                Args:
                    inputs: shape(b_sz, tstp, emb_sz)
                '''
                shape_of_input = tf.shape(inputs)
                b_sz = shape_of_input[0]
                h_sz = self.config.hidden_size
                tstp = shape_of_input[1]
                emb_sz = self.config.embed_size

                def body(time, prev_output, state_ta):
                    '''
                    Args:
                        prev_output: previous output shape(b_sz, tstp, hidden_size)
                    '''

                    prev_output = tf.reshape(prev_output,
                                             shape=[-1, h_sz
                                                    ])  #shape(b_sz*tstp, h_sz)
                    output_linear = tf.nn.rnn_cell._linear(
                        prev_output,
                        output_size=h_sz,  #shape(b_sz*tstp, h_sz)
                        bias=False,
                        scope='output_transformer')
                    output_linear = tf.reshape(
                        output_linear, shape=[b_sz, tstp,
                                              h_sz])  #shape(b_sz, tstp, h_sz)
                    output_linear = tf.tanh(
                        output_linear)  #shape(b_sz, tstp, h_sz)

                    rnn_input = tf.concat(2, [output_linear, inputs],
                                          name='concat_output_input'
                                          )  #shape(b_sz, tstp, h_sz+emb_sz)

                    cell = tf.nn.rnn_cell.BasicLSTMCell(h_sz)
                    cur_outputs, state = tf.nn.dynamic_rnn(cell,
                                                           rnn_input,
                                                           seq_len,
                                                           dtype=tf.float32,
                                                           swap_memory=True,
                                                           time_major=False,
                                                           scope='encoder')
                    state = tf.concat(1, state)
                    state_ta = state_ta.write(time, state)
                    return time + 1, cur_outputs, state_ta  #shape(b_sz, tstp, h_sz)

                def condition(time, *_):
                    return time < feed_back_steps

                state_ta = tf.TensorArray(dtype=tf.float32,
                                          dynamic_size=True,
                                          clear_after_read=True,
                                          size=0)
                initial_output = tf.zeros(shape=[b_sz, tstp, h_sz],
                                          dtype=inputs.dtype,
                                          name='initial_output')
                time = tf.constant(0, dtype=tf.int32)
                _, outputs, state_ta = tf.while_loop(
                    condition,
                    body, [time, initial_output, state_ta],
                    swap_memory=True)
                final_state = state_ta.read(state_ta.size() - 1)
                return final_state, outputs

            _, outputs = feed_back_net(inputs,
                                       self.ph_seqLen,
                                       feed_back_steps=10)

            mask = mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, dim=2)  #b_sz, tstp, 1

            aggregate_state = reduce_avg(outputs,
                                         mask,
                                         tf.expand_dims(self.ph_seqLen, 1),
                                         dim=-2)  #b_sz, h_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.hidden_size])

            for i in range(self.config.fnn_numLayers):
                inputs = rnn.rnn_cell._linear(inputs,
                                              self.config.hidden_size,
                                              bias=True,
                                              scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = rnn.rnn_cell._linear(aggregate_state,
                                          self.config.class_num,
                                          bias=True,
                                          scope='fnn_softmax')
            return logits