Пример #1
0
    def build_model(self):
        '''
        build the MemNN model
        '''
        # the input.
        self.inputs = tf.placeholder(tf.int32, [None, None], name="inputs")
        self.last_inputs = tf.placeholder(tf.int32, [None], name="last_inputs")
        batch_size = tf.shape(self.inputs)[0]
        self.sequence_length = tf.placeholder(tf.int64, [None],
                                              name='sequence_length')
        self.lab_input = tf.placeholder(tf.int32, [None], name="lab_input")
        # the lookup dict.
        self.embe_dict = tf.Variable(self.pre_embedding,
                                     dtype=tf.float32,
                                     trainable=self.emb_up)
        self.pe_mask = tf.Variable(self.pre_embedding_mask,
                                   dtype=tf.float32,
                                   trainable=False)
        self.embe_dict *= self.pe_mask
        sent_bitmap = tf.ones_like(tf.cast(self.inputs, tf.float32))
        inputs = tf.nn.embedding_lookup(self.embe_dict,
                                        self.inputs,
                                        max_norm=1)
        lastinputs = tf.nn.embedding_lookup(self.embe_dict,
                                            self.last_inputs,
                                            max_norm=1)
        org_memory = inputs

        pool_out = pooler(org_memory,
                          'mean',
                          axis=1,
                          sequence_length=tf.cast(
                              tf.reshape(self.sequence_length,
                                         [batch_size, 1]), tf.float32))
        pool_out = tf.reshape(pool_out, [-1, self.hidden_size])

        attlayer = FwNnAttLayer(self.edim,
                                active=self.active,
                                stddev=self.stddev,
                                norm_type='none')
        attout, alph = attlayer.forward(org_memory, lastinputs, pool_out,
                                        sent_bitmap)
        attout = tf.reshape(attout, [-1, self.edim]) + pool_out
        self.alph = tf.reshape(alph, [batch_size, 1, -1])
        self.w1 = tf.Variable(tf.random_normal([self.edim, self.edim],
                                               stddev=self.stddev),
                              trainable=True)
        self.w2 = tf.Variable(tf.random_normal([self.edim, self.edim],
                                               stddev=self.stddev),
                              trainable=True)

        attout = tf.tanh(tf.matmul(attout, self.w1))
        lastinputs = tf.tanh(tf.matmul(lastinputs, self.w2))
        prod = attout * lastinputs
        sco_mat = tf.matmul(prod, self.embe_dict[1:], transpose_b=True)
        self.softmax_input = sco_mat
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=sco_mat, labels=self.lab_input)

        # the optimize.
        self.params = tf.trainable_variables()
        self.optimize = super(Seq2SeqAttNN,
                              self).optimize_normal(self.loss, self.params)
Пример #2
0
    def __init__(self,
                 num_units,
                 n_input,
                 alpha,
                 sigma_rec=0,
                 activation='softplus',
                 w_rec_init='diag',
                 rng=None,
                 reuse=None,
                 name=None):
        super(LeakyRNNCell, self).__init__(_reuse=reuse, name=name)

        # Inputs must be 2-dimensional.
        # self.input_spec = base_layer.InputSpec(ndim=2)

        self._num_units = num_units
        self._w_rec_init = w_rec_init
        self._reuse = reuse

        if activation == 'softplus':
            self._activation = tf.nn.softplus
            self._w_in_start = 1.0
            self._w_rec_start = 0.5
        elif activation == 'tanh':
            self._activation = tf.tanh
            self._w_in_start = 1.0
            self._w_rec_start = 1.0
        elif activation == 'relu':
            self._activation = tf.nn.relu
            self._w_in_start = 1.0
            self._w_rec_start = 0.5
        elif activation == 'power':
            self._activation = lambda x: tf.square(tf.nn.relu(x))
            self._w_in_start = 1.0
            self._w_rec_start = 0.01
        elif activation == 'retanh':
            self._activation = lambda x: tf.tanh(tf.nn.relu(x))
            self._w_in_start = 1.0
            self._w_rec_start = 0.5
        else:
            raise ValueError('Unknown activation')
        self._alpha = alpha
        self._sigma = np.sqrt(2 / alpha) * sigma_rec
        if rng is None:
            self.rng = np.random.RandomState()
        else:
            self.rng = rng

        # Generate initialization matrix
        n_hidden = self._num_units
        w_in0 = (self.rng.randn(n_input, n_hidden) / np.sqrt(n_input) *
                 self._w_in_start)

        if self._w_rec_init == 'diag':
            w_rec0 = self._w_rec_start * np.eye(n_hidden)
        elif self._w_rec_init == 'randortho':
            w_rec0 = self._w_rec_start * tools.gen_ortho_matrix(n_hidden,
                                                                rng=self.rng)
        elif self._w_rec_init == 'randgauss':
            w_rec0 = (self._w_rec_start * self.rng.randn(n_hidden, n_hidden) /
                      np.sqrt(n_hidden))

        matrix0 = np.concatenate((w_in0, w_rec0), axis=0)

        self.w_rnn0 = matrix0
        self._initializer = tf.constant_initializer(matrix0, dtype=tf.float32)
Пример #3
0
    def _build_fused(self, hp):
        n_input = hp['n_input']
        n_rnn = hp['n_rnn']
        n_output = hp['n_output']

        self.x = tf.placeholder("float", [None, None, n_input])
        self.y = tf.placeholder("float", [None, None, n_output])
        if hp['loss_type'] == 'lsq':
            self.c_mask = tf.placeholder("float", [None, n_output])
        else:
            # Mask on time
            self.c_mask = tf.placeholder("float", [None])

        # Activation functions
        if hp['activation'] == 'power':
            f_act = lambda x: tf.square(tf.nn.relu(x))
        elif hp['activation'] == 'retanh':
            f_act = lambda x: tf.tanh(tf.nn.relu(x))
        elif hp['activation'] == 'relu+':
            f_act = lambda x: tf.nn.relu(x + tf.constant(1.))
        else:
            f_act = getattr(tf.nn, hp['activation'])

        # Recurrent activity
        if hp['rnn_type'] == 'LeakyRNN':
            n_in_rnn = self.x.get_shape().as_list()[-1]
            cell = LeakyRNNCell(n_rnn,
                                n_in_rnn,
                                hp['alpha'],
                                sigma_rec=hp['sigma_rec'],
                                activation=hp['activation'],
                                w_rec_init=hp['w_rec_init'],
                                rng=self.rng)
        elif hp['rnn_type'] == 'LeakyGRU':
            cell = LeakyGRUCell(n_rnn,
                                hp['alpha'],
                                sigma_rec=hp['sigma_rec'],
                                activation=f_act)
        elif hp['rnn_type'] == 'LSTM':
            cell = tf.contrib.rnn.LSTMCell(n_rnn, activation=f_act)

        elif hp['rnn_type'] == 'GRU':
            cell = tf.contrib.rnn.GRUCell(n_rnn, activation=f_act)
        else:
            raise NotImplementedError("""rnn_type must be one of LeakyRNN,
                    LeakyGRU, EILeakyGRU, LSTM, GRU
                    """)

        # Dynamic rnn with time major
        self.h, states = rnn.dynamic_rnn(cell,
                                         self.x,
                                         dtype=tf.float32,
                                         time_major=True)

        # Output
        with tf.variable_scope("output"):
            # Using default initialization `glorot_uniform_initializer`
            w_out = tf.get_variable('weights', [n_rnn, n_output],
                                    dtype=tf.float32)
            b_out = tf.get_variable('biases', [n_output],
                                    dtype=tf.float32,
                                    initializer=tf.constant_initializer(
                                        0.0, dtype=tf.float32))

        h_shaped = tf.reshape(self.h, (-1, n_rnn))
        y_shaped = tf.reshape(self.y, (-1, n_output))
        # y_hat_ shape (n_time*n_batch, n_unit)
        y_hat_ = tf.matmul(h_shaped, w_out) + b_out
        if hp['loss_type'] == 'lsq':
            # Least-square loss
            y_hat = tf.sigmoid(y_hat_)
            self.cost_lsq = tf.reduce_mean(
                tf.square((y_shaped - y_hat) * self.c_mask))
        else:
            y_hat = tf.nn.softmax(y_hat_)
            # Cross-entropy loss
            self.cost_lsq = tf.reduce_mean(
                self.c_mask * tf.nn.softmax_cross_entropy_with_logits(
                    labels=y_shaped, logits=y_hat_))

        self.y_hat = tf.reshape(y_hat, (-1, tf.shape(self.h)[1], n_output))
        y_hat_fix, y_hat_ring = tf.split(self.y_hat, [1, n_output - 1],
                                         axis=-1)
        self.y_hat_loc = tf_popvec(y_hat_ring)
Пример #4
0
def cyclegan_generator_resnet(images,
                              arg_scope_fn=cyclegan_arg_scope,
                              num_resnet_blocks=6,
                              num_filters=64,
                              upsample_fn=cyclegan_upsample,
                              kernel_size=3,
                              tanh_linear_slope=0.0,
                              is_training=False):
    """Defines the cyclegan resnet network architecture.

  As closely as possible following
  https://github.com/junyanz/CycleGAN/blob/master/models/architectures.lua#L232

  FYI: This network requires input height and width to be divisible by 4 in
  order to generate an output with shape equal to input shape. Assertions will
  catch this if input dimensions are known at graph construction time, but
  there's no protection if unknown at graph construction time (you'll see an
  error).

  Args:
    images: Input image tensor of shape [batch_size, h, w, 3].
    arg_scope_fn: Function to create the global arg_scope for the network.
    num_resnet_blocks: Number of ResNet blocks in the middle of the generator.
    num_filters: Number of filters of the first hidden layer.
    upsample_fn: Upsampling function for the decoder part of the generator.
    kernel_size: Size w or list/tuple [h, w] of the filter kernels for all inner
      layers.
    tanh_linear_slope: Slope of the linear function to add to the tanh over the
      logits.
    is_training: Whether the network is created in training mode or inference
      only mode. Not actually needed, just for compliance with other generator
      network functions.

  Returns:
    A `Tensor` representing the model output and a dictionary of model end
      points.

  Raises:
    ValueError: If the input height or width is known at graph construction time
      and not a multiple of 4.
  """
    # Neither dropout nor batch norm -> dont need is_training
    del is_training

    end_points = {}

    input_size = images.shape.as_list()
    height, width = input_size[1], input_size[2]
    if height and height % 4 != 0:
        raise ValueError('The input height must be a multiple of 4.')
    if width and width % 4 != 0:
        raise ValueError('The input width must be a multiple of 4.')
    num_outputs = input_size[3]

    if not isinstance(kernel_size, (list, tuple)):
        kernel_size = [kernel_size, kernel_size]

    kernel_height = kernel_size[0]
    kernel_width = kernel_size[1]
    pad_top = (kernel_height - 1) // 2
    pad_bottom = kernel_height // 2
    pad_left = (kernel_width - 1) // 2
    pad_right = kernel_width // 2
    paddings = np.array(
        [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
        dtype=np.int32)
    spatial_pad_3 = np.array([[0, 0], [3, 3], [3, 3], [0, 0]])

    with slim.arg_scope(arg_scope_fn()):

        ###########
        # Encoder #
        ###########
        with tf.variable_scope('input'):
            # 7x7 input stage
            net = tf.pad(tensor=images, paddings=spatial_pad_3, mode='REFLECT')
            net = slim.conv2d(net,
                              num_filters,
                              kernel_size=[7, 7],
                              padding='VALID')
            end_points['encoder_0'] = net

        with tf.variable_scope('encoder'):
            with slim.arg_scope([slim.conv2d],
                                kernel_size=kernel_size,
                                stride=2,
                                activation_fn=tf.nn.relu,
                                padding='VALID'):

                net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
                net = slim.conv2d(net, num_filters * 2)
                end_points['encoder_1'] = net
                net = tf.pad(tensor=net, paddings=paddings, mode='REFLECT')
                net = slim.conv2d(net, num_filters * 4)
                end_points['encoder_2'] = net

        ###################
        # Residual Blocks #
        ###################
        with tf.variable_scope('residual_blocks'):
            with slim.arg_scope([slim.conv2d],
                                kernel_size=kernel_size,
                                stride=1,
                                activation_fn=tf.nn.relu,
                                padding='VALID'):
                for block_id in xrange(num_resnet_blocks):
                    with tf.variable_scope('block_{}'.format(block_id)):
                        res_net = tf.pad(tensor=net,
                                         paddings=paddings,
                                         mode='REFLECT')
                        res_net = slim.conv2d(res_net, num_filters * 4)
                        res_net = tf.pad(tensor=res_net,
                                         paddings=paddings,
                                         mode='REFLECT')
                        res_net = slim.conv2d(res_net,
                                              num_filters * 4,
                                              activation_fn=None)
                        net += res_net

                        end_points['resnet_block_%d' % block_id] = net

        ###########
        # Decoder #
        ###########
        with tf.variable_scope('decoder'):

            with slim.arg_scope([slim.conv2d],
                                kernel_size=kernel_size,
                                stride=1,
                                activation_fn=tf.nn.relu):

                with tf.variable_scope('decoder1'):
                    net = upsample_fn(net,
                                      num_outputs=num_filters * 2,
                                      stride=[2, 2])
                end_points['decoder1'] = net

                with tf.variable_scope('decoder2'):
                    net = upsample_fn(net,
                                      num_outputs=num_filters,
                                      stride=[2, 2])
                end_points['decoder2'] = net

        with tf.variable_scope('output'):
            net = tf.pad(tensor=net, paddings=spatial_pad_3, mode='REFLECT')
            logits = slim.conv2d(net,
                                 num_outputs, [7, 7],
                                 activation_fn=None,
                                 normalizer_fn=None,
                                 padding='valid')
            logits = tf.reshape(logits, _dynamic_or_static_shape(images))

            end_points['logits'] = logits
            end_points['predictions'] = tf.tanh(
                logits) + logits * tanh_linear_slope

    return end_points['predictions'], end_points
Пример #5
0
def gelu(x):
    return 0.5 * x * (
        1 + tf.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))
Пример #6
0
    def __call__(self, x, prev_state):
        prev_read_vector_list = prev_state['read_vector_list']
        controller_input = tf.concat([x] + prev_read_vector_list, axis=-1)

        #next we pass the controller, which is the RNN cell, the controller_input and prev_controller_state
        controller_output = self.controller(controller_input)

        num_parameters_per_head = self.memory_vector_dim + 1
        total_parameter_num = num_parameters_per_head * self.head_num

        #Initiliaze weight matrix and bias and compute the parameters
        weights = tf.Variable(
            tf.random_normal([
                controller_output.get_shape()[0],
                controller_output.get_shape()[2], total_parameter_num
            ],
                             stddev=0.35))
        biases = tf.Variable(tf.zeros([total_parameter_num]))
        parameters = tf.nn.xw_plus_b(controller_output, weights, biases)
        head_parameter_list = tf.split(parameters, self.head_num, axis=2)

        #previous read weight vector
        prev_w_r_list = prev_state['w_r_list']

        #previous memory
        prev_M = prev_state['M']

        #previous usage weight vector
        prev_w_u = prev_state['w_u']

        #previous index and least used weight vector
        prev_indices, prev_w_lu = self.least_used(prev_w_u)

        #read weight vector
        w_r_list = []

        #write weight vector
        w_w_list = []

        #key vector
        k_list = []

        #now, we will initialize some of the important parameters that we use for addressing.
        for i, head_parameter in enumerate(head_parameter_list):
            with tf.variable_scope('addressing_head_%d' % i):

                #key vector
                k = tf.tanh(head_parameter[:, :, 0:self.memory_vector_dim],
                            name='k')

                #sig_alpha
                sig_alpha = tf.sigmoid(head_parameter[:, :, -1:],
                                       name='sig_alpha')

                #read weights
                w_r = self.read_head_addressing(k, prev_M)

                #write weights
                w_w = self.write_head_addressing(sig_alpha, prev_w_r_list[i],
                                                 prev_w_lu)

            w_r_list.append(w_r)
            w_w_list.append(w_w)
            k_list.append(k)

        #usage weight vector
        w_u = self.gamma * prev_w_u + tf.add_n(w_r_list) + tf.add_n(w_w_list)

        #update the memory
        M_ = prev_M * tf.expand_dims(
            1. - tf.one_hot(prev_indices[:, :, -1], self.memory_size), dim=3)

        #write operation
        M = M_
        with tf.variable_scope('writing'):
            for i in range(self.head_num):

                w = tf.expand_dims(w_w_list[i], axis=3)
                k = tf.expand_dims(k_list[i], axis=2)
                M = M + tf.matmul(w, k)

        #read opearion
        read_vector_list = []
        with tf.variable_scope('reading'):
            for i in range(self.head_num):
                read_vector = tf.reduce_sum(
                    tf.expand_dims(w_r_list[i], dim=3) * M, axis=2)
                read_vector_list.append(read_vector)

        #controller output

        state = {
            'read_vector_list': read_vector_list,
            'w_r_list': w_r_list,
            'w_w_list': w_w_list,
            'w_u': w_u,
            'M': M,
        }

        self.step += 1
        return controller_output, state
def create_generator(generator_inputs, generator_outputs_channels):
    layers = []

    print('encoder:')
    print(generator_inputs.shape)

    # encoder_1: [batch, 256, 256, in_channels] => [batch, 128, 128, ngf]
    with tf.variable_scope("encoder_1"):
        output = gen_conv(generator_inputs, a.ngf)
        layers.append(output)
        print(output.shape)

    layer_specs = [
        a.ngf *
        2,  # encoder_2: [batch, 128, 128, ngf] => [batch, 64, 64, ngf * 2]
        a.ngf *
        4,  # encoder_3: [batch, 64, 64, ngf * 2] => [batch, 32, 32, ngf * 4]
        a.ngf *
        8,  # encoder_4: [batch, 32, 32, ngf * 4] => [batch, 16, 16, ngf * 8]
        a.ngf *
        8,  # encoder_5: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8]
        a.ngf *
        8,  # encoder_6: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8]
        a.ngf *
        8,  # encoder_7: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8]
        a.ngf *
        8,  # encoder_8: [batch, 2, 2, ngf * 8] => [batch, 1, 1, ngf * 8]
    ]

    for out_channels in layer_specs:
        with tf.variable_scope("encoder_%d" % (len(layers) + 1)):
            rectified = lrelu(layers[-1], 0.2)
            # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels]
            convolved = gen_conv(rectified, out_channels)
            output = batchnorm(convolved)
            layers.append(output)
            print(output.shape)

    print('decoder:')

    layer_specs = [
        (a.ngf * 8, 0.5
         ),  # decoder_8: [batch, 1, 1, ngf * 8] => [batch, 2, 2, ngf * 8 * 2]
        (
            a.ngf * 8, 0.5
        ),  # decoder_7: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 8 * 2]
        (
            a.ngf * 8, 0.5
        ),  # decoder_6: [batch, 4, 4, ngf * 8 * 2] => [batch, 8, 8, ngf * 8 * 2]
        (
            a.ngf * 8, 0.0
        ),  # decoder_5: [batch, 8, 8, ngf * 8 * 2] => [batch, 16, 16, ngf * 8 * 2]
        (
            a.ngf * 4, 0.0
        ),  # decoder_4: [batch, 16, 16, ngf * 8 * 2] => [batch, 32, 32, ngf * 4 * 2]
        (
            a.ngf * 2, 0.0
        ),  # decoder_3: [batch, 32, 32, ngf * 4 * 2] => [batch, 64, 64, ngf * 2 * 2]
        (
            a.ngf, 0.0
        ),  # decoder_2: [batch, 64, 64, ngf * 2 * 2] => [batch, 128, 128, ngf * 2]
    ]

    num_encoder_layers = len(layers)
    for decoder_layer, (out_channels, dropout) in enumerate(layer_specs):
        skip_layer = num_encoder_layers - decoder_layer - 1
        with tf.variable_scope("decoder_%d" % (skip_layer + 1)):
            if decoder_layer == 0:
                # first decoder layer doesn't have skip connections
                # since it is directly connected to the skip_layer
                input = layers[-1]
            else:
                input = tf.concat([layers[-1], layers[skip_layer]], axis=3)

            rectified = tf.nn.relu(input)
            # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels]
            output = gen_deconv(rectified, out_channels)
            output = batchnorm(output)

            if dropout > 0.0:
                output = tf.nn.dropout(output, keep_prob=1 - dropout)

            layers.append(output)
            print(output.shape)

    # decoder_1: [batch, 128, 128, ngf * 2] => [batch, 256, 256, generator_outputs_channels]
    with tf.variable_scope("decoder_1"):
        input = tf.concat([layers[-1], layers[0]], axis=3)
        rectified = tf.nn.relu(input)
        output = gen_deconv(rectified, generator_outputs_channels)
        output = tf.tanh(output)
        layers.append(output)
        print(output.shape)
    return layers[-1]
Пример #8
0
    def __call__(self, inputs, state, scope=None):
        """Run this RNN cell on inputs, starting from the given state.

    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: `2-D Tensor` with shape `[batch_size, self.state_size]`.
      scope: optional cell scope.

    Returns:
      A pair containing:

      - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`.
      - New state: A single `2-D` tensor.
    """
        batch_size, hidden_size = inputs.shape
        fixed_arc = self._params.fixed_arc
        num_layers = len(fixed_arc) // 2
        prev_s = self.prev_s
        w_prev = self.w_prev
        w_skip = self.w_skip
        input_mask = self._input_mask
        layer_mask = self._layer_mask

        if layer_mask is not None:
            assert input_mask is not None
            ht = tf.matmul(
                tf.concat([inputs * input_mask, state * layer_mask], axis=1),
                w_prev)
        else:
            ht = tf.matmul(tf.concat([inputs, state], axis=1), w_prev)
        h, t = tf.split(ht, 2, axis=1)
        h = tf.tanh(h)
        t = tf.sigmoid(t)
        s = state + t * (h - state)
        layers = [s]

        def _select_function(h, function_id):
            if function_id == 0:
                return tf.tanh(h)
            elif function_id == 1:
                return tf.nn.relu(h)
            elif function_id == 2:
                return tf.sigmoid(h)
            elif function_id == 3:
                return h
            raise ValueError('Unknown func_idx {0}'.format(function_id))

        start_idx = 0
        used = np.zeros(num_layers + 1, dtype=np.float32)
        for layer_id in range(num_layers):
            prev_idx = fixed_arc[start_idx]
            func_idx = fixed_arc[start_idx + 1]
            prev_s = layers[prev_idx]
            used[prev_idx] = 1
            if layer_mask is not None:
                ht = tf.matmul(prev_s * layer_mask, w_skip[layer_id])
            else:
                ht = tf.matmul(prev_s, w_skip[layer_id])
            h, t = tf.split(ht, 2, axis=1)

            h = _select_function(h, func_idx)
            t = tf.sigmoid(t)
            s = prev_s + t * (h - prev_s)
            s.set_shape([batch_size, hidden_size])
            layers.append(s)
            start_idx += 2

        if self._params.average_loose_ends:
            layers = [l for l, u in zip(layers, used) if u == 0]
            next_s = tf.add_n(layers) / np.sum(1. - used)
        else:
            next_s = tf.add_n(layers[1:]) / tf.cast(num_layers,
                                                    dtype=tf.float32)
        return next_s, next_s
Пример #9
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias=0.1,
        attention_size=10,
    ):
        def lstm_cell():
            return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False)

        backward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
        forward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
        self.X = tf.placeholder(tf.float32, [None, None, size])
        self.Y = tf.placeholder(tf.float32, [None, output_size])
        drop_backward = tf.nn.rnn_cell.DropoutWrapper(
            backward_rnn_cells, output_keep_prob=forget_bias)
        drop_forward = tf.nn.rnn_cell.DropoutWrapper(
            forward_rnn_cells, output_keep_prob=forget_bias)
        self.backward_hidden_layer = tf.placeholder(tf.float32,
                                                    shape=(None, num_layers *
                                                           2 * size_layer))
        self.forward_hidden_layer = tf.placeholder(tf.float32,
                                                   shape=(None, num_layers *
                                                          2 * size_layer))
        outputs, last_state = tf.nn.bidirectional_dynamic_rnn(
            drop_forward,
            drop_backward,
            self.X,
            initial_state_fw=self.forward_hidden_layer,
            initial_state_bw=self.backward_hidden_layer,
            dtype=tf.float32,
        )
        outputs = list(outputs)
        attention_w = tf.get_variable('attention_v1', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(last_state[0][:, size_layer:], 1), attention_size)
        keys = tf.layers.dense(outputs[0], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        outputs[0] = tf.squeeze(
            tf.matmul(tf.transpose(outputs[0], [0, 2, 1]),
                      tf.expand_dims(align, 2)),
            2,
        )
        outputs[0] = tf.concat([outputs[0], last_state[0][:, size_layer:]], 1)

        attention_w = tf.get_variable('attention_v2', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(last_state[1][:, size_layer:], 1), attention_size)
        keys = tf.layers.dense(outputs[1], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        outputs[1] = tf.squeeze(
            tf.matmul(tf.transpose(outputs[1], [0, 2, 1]),
                      tf.expand_dims(align, 2)),
            2,
        )
        outputs[1] = tf.concat([outputs[1], last_state[1][:, size_layer:]], 1)

        with tf.variable_scope('decoder', reuse=False):
            self.backward_rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
            self.forward_rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell() for _ in range(num_layers)], state_is_tuple=False)
            backward_drop_dec = tf.nn.rnn_cell.DropoutWrapper(
                self.backward_rnn_cells_dec, output_keep_prob=forget_bias)
            forward_drop_dec = tf.nn.rnn_cell.DropoutWrapper(
                self.forward_rnn_cells_dec, output_keep_prob=forget_bias)
            self.outputs, self.last_state = tf.nn.bidirectional_dynamic_rnn(
                forward_drop_dec,
                backward_drop_dec,
                self.X,
                initial_state_fw=outputs[0],
                initial_state_bw=outputs[1],
                dtype=tf.float32,
            )
        self.outputs = list(self.outputs)
        attention_w = tf.get_variable('attention_v3', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(self.last_state[0][:, size_layer:], 1),
            attention_size,
        )
        keys = tf.layers.dense(self.outputs[0], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        self.outputs[0] = tf.squeeze(
            tf.matmul(
                tf.transpose(self.outputs[0], [0, 2, 1]),
                tf.expand_dims(align, 2),
            ),
            2,
        )

        attention_w = tf.get_variable('attention_v4', [attention_size],
                                      tf.float32)
        query = tf.layers.dense(
            tf.expand_dims(self.last_state[1][:, size_layer:], 1),
            attention_size,
        )
        keys = tf.layers.dense(self.outputs[1], attention_size)
        align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2])
        align = tf.nn.tanh(align)
        self.outputs[1] = tf.squeeze(
            tf.matmul(
                tf.transpose(self.outputs[1], [0, 2, 1]),
                tf.expand_dims(align, 2),
            ),
            2,
        )
        self.outputs = tf.concat(self.outputs, 1)
        self.logits = tf.layers.dense(self.outputs, output_size)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(self.cost)
Пример #10
0
    def build(self, inputs):
        """Build the graph for this configuration.

    Args:
      inputs: A dict of inputs. For training, should contain 'wav'.

    Returns:
      A dict of outputs that includes the 'predictions',
      'init_ops', the 'push_ops', and the 'quantized_input'.
    """
        num_stages = 10
        num_layers = 30
        filter_length = 3
        width = 512
        skip_width = 256
        num_z = 16

        # Encode the source with 8-bit Mu-Law.
        x = inputs['wav']
        batch_size = self.batch_size
        x_quantized = utils.mu_law(x)
        x_scaled = tf.cast(x_quantized, tf.float32) / 128.0
        x_scaled = tf.expand_dims(x_scaled, 2)

        encoding = tf.placeholder(name='encoding',
                                  shape=[batch_size, num_z],
                                  dtype=tf.float32)
        en = tf.expand_dims(encoding, 1)

        init_ops, push_ops = [], []

        ###
        # The WaveNet Decoder.
        ###
        l = x_scaled
        l, inits, pushs = utils.causal_linear(x=l,
                                              n_inputs=1,
                                              n_outputs=width,
                                              name='startconv',
                                              rate=1,
                                              batch_size=batch_size,
                                              filter_length=filter_length)

        for init in inits:
            init_ops.append(init)
        for push in pushs:
            push_ops.append(push)

        # Set up skip connections.
        s = utils.linear(l, width, skip_width, name='skip_start')

        # Residual blocks with skip connections.
        for i in range(num_layers):
            dilation = 2**(i % num_stages)

            # dilated masked cnn
            d, inits, pushs = utils.causal_linear(x=l,
                                                  n_inputs=width,
                                                  n_outputs=width * 2,
                                                  name='dilatedconv_%d' %
                                                  (i + 1),
                                                  rate=dilation,
                                                  batch_size=batch_size,
                                                  filter_length=filter_length)

            for init in inits:
                init_ops.append(init)
            for push in pushs:
                push_ops.append(push)

            # local conditioning
            d += utils.linear(en,
                              num_z,
                              width * 2,
                              name='cond_map_%d' % (i + 1))

            # gated cnn
            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d = tf.sigmoid(d[:, :, :m]) * tf.tanh(d[:, :, m:])

            # residuals
            l += utils.linear(d, width, width, name='res_%d' % (i + 1))

            # skips
            s += utils.linear(d, width, skip_width, name='skip_%d' % (i + 1))

        s = tf.nn.relu(s)
        s = (utils.linear(s, skip_width, skip_width, name='out1') +
             utils.linear(en, num_z, skip_width, name='cond_map_out1'))
        s = tf.nn.relu(s)

        ###
        # Compute the logits and get the loss.
        ###
        logits = utils.linear(s, skip_width, 256, name='logits')
        logits = tf.reshape(logits, [-1, 256])
        probs = tf.nn.softmax(logits, name='softmax')

        return {
            'init_ops': init_ops,
            'push_ops': push_ops,
            'predictions': probs,
            'encoding': encoding,
            'quantized_input': x_quantized,
        }
Пример #11
0
    def __call__(self, x, state, timestep=0, scope=None):
        with tf.variable_scope(scope or type(self).__name__):
            total_h, total_c = tf.split(state, 2, 1)
            h = total_h[:, 0:self.num_units]
            c = total_c[:, 0:self.num_units]
            self.hyper_state = tf.concat(
                [total_h[:, self.num_units:], total_c[:, self.num_units:]], 1)

            batch_size = x.get_shape().as_list()[0]
            x_size = x.get_shape().as_list()[1]
            self._input_size = x_size

            w_init = None  # uniform

            h_init = lstm_ortho_initializer(1.0)

            w_xh = tf.get_variable('W_xh', [x_size, 4 * self.num_units],
                                   initializer=w_init)
            w_hh = tf.get_variable('W_hh',
                                   [self.num_units, 4 * self.num_units],
                                   initializer=h_init)
            bias = tf.get_variable('bias', [4 * self.num_units],
                                   initializer=tf.constant_initializer(0.0))

            # concatenate the input and hidden states for hyperlstm input
            hyper_input = tf.concat([x, h], 1)
            hyper_output, hyper_new_state = self.hyper_cell(
                hyper_input, self.hyper_state)
            self.hyper_output = hyper_output
            self.hyper_state = hyper_new_state

            xh = tf.matmul(x, w_xh)
            hh = tf.matmul(h, w_hh)

            # split Wxh contributions
            ix, jx, fx, ox = tf.split(xh, 4, 1)
            ix = self.hyper_norm(ix, 'hyper_ix', use_bias=False)
            jx = self.hyper_norm(jx, 'hyper_jx', use_bias=False)
            fx = self.hyper_norm(fx, 'hyper_fx', use_bias=False)
            ox = self.hyper_norm(ox, 'hyper_ox', use_bias=False)

            # split Whh contributions
            ih, jh, fh, oh = tf.split(hh, 4, 1)
            ih = self.hyper_norm(ih, 'hyper_ih', use_bias=True)
            jh = self.hyper_norm(jh, 'hyper_jh', use_bias=True)
            fh = self.hyper_norm(fh, 'hyper_fh', use_bias=True)
            oh = self.hyper_norm(oh, 'hyper_oh', use_bias=True)

            # split bias
            ib, jb, fb, ob = tf.split(bias, 4, 0)  # bias is to be broadcasted.

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i = ix + ih + ib
            j = jx + jh + jb
            f = fx + fh + fb
            o = ox + oh + ob

            if self.use_layer_norm:
                concat = tf.concat([i, j, f, o], 1)
                concat = layer_norm_all(concat, batch_size, 4, self.num_units,
                                        'ln_all')
                i, j, f, o = tf.split(concat, 4, 1)

            if self.use_recurrent_dropout:
                g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob)
            else:
                g = tf.tanh(j)

            new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g
            new_h = tf.tanh(layer_norm(new_c, self.num_units,
                                       'ln_c')) * tf.sigmoid(o)

            hyper_h, hyper_c = tf.split(hyper_new_state, 2, 1)
            new_total_h = tf.concat([new_h, hyper_h], 1)
            new_total_c = tf.concat([new_c, hyper_c], 1)
            new_total_state = tf.concat([new_total_h, new_total_c], 1)
        return new_h, new_total_state
Пример #12
0
    def build(self,
              inputs,
              is_training,
              rescale_inputs=True,
              include_decoder=True,
              use_reduce_mean_to_pool=False):
        """Build the graph for this configuration.

    Args:
      inputs: A dict of inputs. For training, should contain 'wav'.
      is_training: Whether we are training or not. Not used in this config.
      rescale_inputs: Whether to convert inputs to mu-law and back to unit
        scaling before passing through the model (loses gradients).
      include_decoder: bool, whether to include the decoder in the build().
      use_reduce_mean_to_pool: whether to use reduce_mean (instead of pool1d)
        for pooling.
    Returns:
      A dict of outputs that includes the 'predictions', 'loss', the 'encoding',
      the 'quantized_input', and whatever metrics we want to track for eval.
    """
        num_stages = 10
        num_layers = 30
        filter_length = 3
        width = 512
        skip_width = 256
        ae_num_stages = 10
        ae_num_layers = 30
        ae_filter_length = 3
        ae_width = 128

        # Encode the source with 8-bit Mu-Law.
        x = inputs['wav']
        x_quantized = utils.mu_law(x)
        x_scaled = tf.cast(x_quantized, tf.float32) / 128.0
        x_scaled = tf.expand_dims(x_scaled, 2)
        x = tf.expand_dims(x, 2)

        ###
        # The Non-Causal Temporal Encoder.
        ###
        en = masked.conv1d(x_scaled if rescale_inputs else x,
                           causal=False,
                           num_filters=ae_width,
                           filter_length=ae_filter_length,
                           name='ae_startconv',
                           is_training=is_training)

        for num_layer in range(ae_num_layers):
            dilation = 2**(num_layer % ae_num_stages)
            d = tf.nn.relu(en)
            d = masked.conv1d(d,
                              causal=False,
                              num_filters=ae_width,
                              filter_length=ae_filter_length,
                              dilation=dilation,
                              name='ae_dilatedconv_%d' % (num_layer + 1),
                              is_training=is_training)
            d = tf.nn.relu(d)
            en += masked.conv1d(d,
                                num_filters=ae_width,
                                filter_length=1,
                                name='ae_res_%d' % (num_layer + 1),
                                is_training=is_training)

        en = masked.conv1d(en,
                           num_filters=self.ae_bottleneck_width,
                           filter_length=1,
                           name='ae_bottleneck',
                           is_training=is_training)

        if use_reduce_mean_to_pool:
            # Depending on the accelerator used for training, masked.pool1d may
            # lead to out of memory error.
            # reduce_mean is equivalent to masked.pool1d when the stride is the same
            # as the window length (which is the case here).
            batch_size, unused_length, depth = en.shape.as_list()
            en = tf.reshape(en, [batch_size, -1, self.ae_hop_length, depth])
            en = tf.reduce_mean(en, axis=2)
        else:
            en = masked.pool1d(en,
                               self.ae_hop_length,
                               name='ae_pool',
                               mode='avg')
        encoding = en

        if not include_decoder:
            return {'encoding': encoding}

        ###
        # The WaveNet Decoder.
        ###
        l = masked.shift_right(x_scaled if rescale_inputs else x)
        l = masked.conv1d(l,
                          num_filters=width,
                          filter_length=filter_length,
                          name='startconv',
                          is_training=is_training)

        # Set up skip connections.
        s = masked.conv1d(l,
                          num_filters=skip_width,
                          filter_length=1,
                          name='skip_start',
                          is_training=is_training)

        # Residual blocks with skip connections.
        for i in range(num_layers):
            dilation = 2**(i % num_stages)
            d = masked.conv1d(l,
                              num_filters=2 * width,
                              filter_length=filter_length,
                              dilation=dilation,
                              name='dilatedconv_%d' % (i + 1),
                              is_training=is_training)
            d = self._condition(
                d,
                masked.conv1d(en,
                              num_filters=2 * width,
                              filter_length=1,
                              name='cond_map_%d' % (i + 1),
                              is_training=is_training))

            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d_sigmoid = tf.sigmoid(d[:, :, :m])
            d_tanh = tf.tanh(d[:, :, m:])
            d = d_sigmoid * d_tanh

            l += masked.conv1d(d,
                               num_filters=width,
                               filter_length=1,
                               name='res_%d' % (i + 1),
                               is_training=is_training)
            s += masked.conv1d(d,
                               num_filters=skip_width,
                               filter_length=1,
                               name='skip_%d' % (i + 1),
                               is_training=is_training)

        s = tf.nn.relu(s)
        s = masked.conv1d(s,
                          num_filters=skip_width,
                          filter_length=1,
                          name='out1',
                          is_training=is_training)
        s = self._condition(
            s,
            masked.conv1d(en,
                          num_filters=skip_width,
                          filter_length=1,
                          name='cond_map_out1',
                          is_training=is_training))
        s = tf.nn.relu(s)

        ###
        # Compute the logits and get the loss.
        ###
        logits = masked.conv1d(s,
                               num_filters=256,
                               filter_length=1,
                               name='logits',
                               is_training=is_training)
        logits = tf.reshape(logits, [-1, 256])
        probs = tf.nn.softmax(logits, name='softmax')
        x_indices = tf.cast(tf.reshape(x_quantized, [-1]), tf.int32) + 128
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=x_indices, name='nll'),
                              0,
                              name='loss')

        return {
            'predictions': probs,
            'loss': loss,
            'eval': {
                'nll': loss
            },
            'quantized_input': x_quantized,
            'encoding': encoding,
        }
Пример #13
0
def self_normalizing_tanh(x):
    return 1.592537419722831 * tf.tanh(x)
Пример #14
0
def deep_reg(dimensions=[784, 512, 256, 64],ct=[0.1,0.1,0.1,0.1], activation = 'leaky_relu'):
    """
    This method builds the architecture corresponding to a NN regression.

    Parameters:
    -----------
    dimensions: list of ints
        List of number of neurons per layer.
    ct: list of floats
        List of values of the contamination level applied to the NN per layer.
    activation: str {'leaky_relu', 'relu_tanh', 'leaky_relu_l', 'tanh'}
        It defines the activation functions applied (default is 'leaky_relu')

            'leaky_relu':
                Leaky ReLU for all activation functions

            'relu_tanh':
                tanh on last hidden layer, Leaky ReLU for the rest

            'leaky_relu_l':
                Linear function on last hidden layer, Leaky ReLU for the rest

            'tanh':
                tanh for all activation functions

    Returns:
    --------
    dict
        Dictionary with:

            x: np.ndarray
                Input training data

            theta: np.ndarray
                Labels for supervised learning

            z: np.ndarray
                NN output

            W: np.ndarray
                Weights of NN

            b: np.ndarray
                Bias

            cost: float
                Value of cost function
    """
    L = len(dimensions) -1

    # INPUT DATA

    x = tf.placeholder(tf.float32, [None, dimensions[0]], name='x')
    theta = tf.placeholder(tf.float32, [None, dimensions[-1]], name='theta')
    pcost = tf.placeholder(tf.float32, [1], name='pcost')

    # NOISY ENCODER

    encoder = []
    all_h = []
    b_enc = []
    noise = tf.random_normal(shape=tf.shape(x),stddev=ct[0],dtype=tf.float32)
    current_input = x + noise
    all_h.append(current_input)

    for layer_i in range(1,L+1):

        " Defining the variables "

        n_input = dimensions[layer_i-1]
        n_output = dimensions[layer_i]

        low = -np.sqrt(6.0/(n_input + n_output))
        high = np.sqrt(6.0/(n_input + n_output))
        nameW = 'Weights'
        W = tf.Variable(
            tf.random_uniform([n_input, n_output],
                              minval=low,
                              maxval=high),name=nameW.format(layer_i - 1))
        nameB = 'Bias'
        be = tf.Variable(tf.zeros([n_output]),name=nameB.format(layer_i - 1))

        b_enc.append(be)
        encoder.append(W)

        if layer_i == L+1:
            output = tf.matmul(current_input, W) + be
        else:
            if activation == 'leaky_relu' or (activation in ['relu_tanh', 'leaky_relu_l'] and layer_i < L):
                output = leaky_relu(tf.matmul(current_input, W) + be)
            elif activation == 'leaky_relu_l' and layer_i == L:
                output = tf.matmul(current_input, W) + be
            elif activation == 'tanh' or (activation == 'relu_tanh' and layer_i == L):
                output = tf.tanh(tf.matmul(current_input, W) + be)
        noise = tf.random_normal(shape=tf.shape(output),stddev=ct[layer_i],dtype=tf.float32)
        current_input = output + noise

    z_out = output
    cost = tf.reduce_mean(tf.square(output - theta))
    return {'x': x,'theta':theta, 'z': z_out,'W':encoder,'b':b_enc,'cost':cost}
Пример #15
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""
        hidden_size = self.params.controller_hidden_size
        num_layers = self.params.controller_num_layers

        arc_seq = []
        sample_log_probs = []
        sample_entropy = []
        all_h = [tf.zeros([1, hidden_size], dtype=tf.float32)]
        all_h_w = [tf.zeros([1, hidden_size], dtype=tf.float32)]

        # sampler ops
        inputs = self.g_emb
        prev_c = tf.zeros([1, hidden_size], dtype=tf.float32)
        prev_h = tf.zeros([1, hidden_size], dtype=tf.float32)

        inputs = self.g_emb
        for layer_id in range(1, num_layers + 1):
            next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            all_h.append(next_h)
            all_h_w.append(tf.matmul(next_h, self.attn_w_1))

            query = tf.matmul(next_h, self.attn_w_2)
            query = query + tf.concat(all_h_w[:-1], axis=0)
            query = tf.tanh(query)
            logits = tf.matmul(query, self.attn_v)
            logits = tf.reshape(logits, [1, layer_id])

            if self.params.controller_temperature:
                logits /= self.params.controller_temperature
            if self.params.controller_tanh_constant:
                logits = self.params.controller_tanh_constant * tf.tanh(logits)
            diff = tf.to_float(layer_id - tf.range(0, layer_id))**2
            logits -= tf.reshape(diff, [1, layer_id]) / 6.0

            skip_index = tf.multinomial(logits, 1)
            skip_index = tf.to_int32(skip_index)
            skip_index = tf.reshape(skip_index, [1])
            arc_seq.append(skip_index)

            log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=skip_index)
            sample_log_probs.append(log_prob)

            entropy = log_prob * tf.exp(-log_prob)
            sample_entropy.append(tf.stop_gradient(entropy))

            inputs = tf.nn.embedding_lookup(tf.concat(all_h[:-1], axis=0),
                                            skip_index)
            inputs /= (0.1 + tf.to_float(layer_id - skip_index))

            next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            logits = tf.matmul(next_h, self.w_emb, transpose_b=True)
            if self.params.controller_temperature:
                logits /= self.params.controller_temperature
            if self.params.controller_tanh_constant:
                logits = self.params.controller_tanh_constant * tf.tanh(logits)
            func = tf.multinomial(logits, 1)
            func = tf.to_int32(func)
            func = tf.reshape(func, [1])
            arc_seq.append(func)
            log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=func)
            sample_log_probs.append(log_prob)
            entropy = log_prob * tf.exp(-log_prob)
            sample_entropy.append(tf.stop_gradient(entropy))
            inputs = tf.nn.embedding_lookup(self.w_emb, func)

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = arc_seq

        self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
        self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs))

        sample_entropy = tf.concat(sample_entropy, axis=0)
        self.sample_entropy = tf.reduce_sum(sample_entropy)

        self.all_h = all_h
Пример #16
0
     def create(self):
       
       tf.reset_default_graph()
       self.weight_bias_init()
       self.x_ph = tf.placeholder("float32", [1, self.batch.shape[0], self.batch.shape[1]])
       self.y_ph = tf.placeholder("float32", self.batch_targ.shape)
       self.seq=tf.constant(self.truncated,shape=[1])
       self.seq2=tf.constant(self.truncated,shape=[1]) 
       self.dropout_ph = tf.placeholder("float32")
       self.fw_cell=self.cell_create('1')
       self.fw_cell2=self.cell_create('2')
       if self.configuration=='R':
           self.outputs, self.states= tf.nn.dynamic_rnn(self.fw_cell, self.x_ph,
                                             sequence_length=self.seq,dtype=tf.float32)
           if self.attention_number >0:
               self.outputs_zero_padded=tf.pad(self.outputs,[[0,0],[self.attention_number,0],[0,0]])
               self.RNNout1=tf.stack([tf.reshape(self.outputs_zero_padded[:,g:g+(self.attention_number+1)],[self.n_hidden[(len(self.n_hidden)-1)]*((self.attention_number)+1)]) for g in range(self.batch_size)])
               self.presoft=tf.matmul(self.RNNout1, self.weights) + self.biases
           else: 
               self.presoft=tf.matmul(self.outputs[0][0], self.weights) + self.biases
       elif self.configuration=='B':
           self.bw_cell=self.cell_create('1')
           self.bw_cell2=self.cell_create('2')
           with tf.variable_scope('1'):
               self.outputs, self.states= tf.nn.bidirectional_dynamic_rnn(self.fw_cell, self.bw_cell, self.x_ph,
                                             sequence_length=self.seq,dtype=tf.float32)
                                              
           self.first_out=tf.concat((self.outputs[0],self.outputs[1]),2)
           with tf.variable_scope('2'):
               self.outputs2, self.states2= tf.nn.bidirectional_dynamic_rnn(self.fw_cell2, self.bw_cell2, self.first_out,
                                                 sequence_length=self.seq2,dtype=tf.float32)
           self.second_out=tf.concat((self.outputs2[0],self.outputs2[1]),2)
            
           for i in range((self.attention_number*2)+1):
               self.attention_weight_init(i)
                
            
       
           self.zero_pad_second_out=tf.pad(tf.squeeze(self.second_out),[[self.attention_number,self.attention_number],[0,0]])
#               self.attention_chunks.append(self.zero_pad_second_out[j:j+attention_number*2])
           self.attention_m=[tf.tanh(tf.matmul(tf.concat((self.zero_pad_second_out[j:j+self.batch_size],tf.squeeze(self.first_out)),1),self.attention_weights[j])) for j in range((self.attention_number*2)+1)]
           self.attention_s=tf.nn.softmax(tf.stack([tf.matmul(self.attention_m[i],self.sm_attention_weights[i]) for i in range(self.attention_number*2+1)]),0)
           self.attention_z=tf.reduce_sum([self.attention_s[i]*self.zero_pad_second_out[i:self.batch_size+i] for i in range(self.attention_number*2+1)],0)
           self.presoft=tf.matmul(self.attention_z,self.weights)+self.biases
       if  self.output_act=='softmax':   
           self.pred=tf.nn.softmax(self.presoft)
           self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.presoft, labels=self.y_ph))
       elif self.output_act=='sigmoid':
           self.pred=tf.nn.sigmoid(self.presoft)
           self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.presoft, labels=self.y_ph))
       if self.optimizer == 'GD':
             self.optimize = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.cost)
       elif self.optimizer == 'Adam':
             self.optimize = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost) 
       elif self.optimizer == 'RMS':
             self.optimize = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.cost) 
       self.correct_pred = tf.equal(tf.argmax(self.pred,1), tf.argmax(self.y_ph,1))
       self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
       self.init = tf.global_variables_initializer()
       self.saver = tf.train.Saver()
       self.saver_var = tf.train.Saver(tf.trainable_variables())
       if self.save_location==[]:
           self.save_location=os.getcwd()