示例#1
0
def MultiDropoutGRUCell(size=DEFAULT_GRU_INTERNAL_SIZE, pkeep=DEFAULT_GRU_DROPOUT_KEEP_RATE, nlayers=DEFAULT_GRU_LAYERS):
    cell = DropoutGRUCell(size=size, pkeep=pkeep)
    cell = rnn.MultiRNNCell([cell] * nlayers, state_is_tuple=False)
    cell = rnn.DropoutWrapper(cell, output_keep_prob=pkeep)
    return cell
示例#2
0
    def __init__(self,
                 config,
                 batch,
                 lens_batch,
                 emotion_batch,
                 nrc_batch,
                 embed_matrix,
                 phase=Phase.Predict):
        batch_size = batch.shape[1]
        input_size = batch.shape[2]  # 31
        emotion_size = emotion_batch.shape[2]  # 6

        # The tweets. input_size is the (maximum) number of timesteps, i.e. maximum tweet length
        self._x = tf.placeholder(tf.int32, shape=[batch_size, input_size])

        # This tensor provides the actual number of timesteps for each
        # instance (words in a tweet).
        self._lens = tf.placeholder(tf.int32, shape=[batch_size])

        # The emotion distribution
        if phase != Phase.Predict:
            self._y = tf.placeholder(tf.int32,
                                     shape=[batch_size, emotion_size])

        # Embedding matrix
        self._embed = tf.placeholder(
            tf.float32, shape=[embed_matrix.shape[0], embed_matrix.shape[1]])
        word_embeddings = tf.nn.embedding_lookup(self._embed, self._x)

        # Lexicon
        self._lexicon = lexicon = tf.placeholder(
            tf.float32, shape=[batch_size, input_size, emotion_size])

        features = tf.concat([word_embeddings, lexicon], axis=2)

        cell = rnn.GRUCell(100)

        if phase == Phase.Train:
            regularized_cell = rnn.DropoutWrapper(
                cell,
                input_keep_prob=config.input_dropout,
                state_keep_prob=config.hidden_dropout)
            _, hidden = tf.nn.dynamic_rnn(regularized_cell,
                                          features,
                                          sequence_length=self._lens,
                                          dtype=tf.float32)
        else:
            _, hidden = tf.nn.dynamic_rnn(cell,
                                          features,
                                          sequence_length=self._lens,
                                          dtype=tf.float32)

        w = tf.get_variable("w", shape=[hidden.shape[1], emotion_size])
        b = tf.get_variable("b", shape=[emotion_size])
        logits = tf.matmul(hidden, w) + b

        if phase == Phase.Train or Phase.Validation:
            losses = tf.nn.softmax_cross_entropy_with_logits(labels=self._y,
                                                             logits=logits)
            self._loss = loss = tf.reduce_sum(losses)

        if phase == Phase.Train:
            start_lr = 0.01
            global_step = tf.Variable(0, trainable=False)
            learning_rate = tf.train.exponential_decay(start_lr, global_step,
                                                       batch.shape[0], 0.90)
            self._train_op = tf.train.AdamOptimizer(learning_rate) \
                .minimize(losses, global_step=global_step)
            self._probs = probs = tf.nn.softmax(logits)

        if phase == Phase.Validation:
            # Emotions of the gold data
            self._gold = gold_emotions = tf.argmax(self.y, axis=1)

            # Predicted emotions
            self._pred = pred_emotions = tf.argmax(logits, axis=1)

            correct = tf.equal(gold_emotions, pred_emotions)
            correct = tf.cast(correct, tf.float32)
            self._accuracy = tf.reduce_mean(correct)
示例#3
0
        def get_a_cell(lstm_size, keep_prob):

            lstm = rnn.BasicLSTMCell(lstm_size)
            drop = rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
            return drop
示例#4
0
    def __init__(self, use_lstm=False, num_samples=512, forward_only=False):
        self.source_vocab_size = config.vocabulary_size
        self.target_vocab_size = config.vocabulary_size
        self.buckets = config.BUCKETS
        self.batch_size = config.FLAGS.batch_size
        self.learning_rate = tf.Variable(float(config.FLAGS.learning_rate),
                                         trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * config.FLAGS.learning_rate_decay_factor)
        self.lsmt_size = config.FLAGS.lstm_size
        self.num_layers = config.FLAGS.num_layers
        self.dropout = config.FLAGS.dropout
        self.max_gradient_norm = config.FLAGS.max_gradient_norm
        self.global_step = tf.Variable(0, trainable=False)
        self.model_dir = config.model_dir

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None

        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w = tf.get_variable('proj_w',
                                [self.lsmt_size, self.target_vocab_size])
            w_t = tf.transpose(w)
            b = tf.get_variable('proj_b', [self.target_vocab_size])
            output_projection = (w, b)

            def sampled_loss(labels, logits):
                labels = tf.reshape(
                    labels,
                    [-1, 1])  # Add one dimension (nb of true classes, here 1)

                # We need to compute the sampled_softmax_loss using 32bit floats to
                # avoid numerical instabilities.
                localWt = tf.cast(w_t, tf.float32)
                localB = tf.cast(b, tf.float32)
                localInputs = tf.cast(logits, tf.float32)

                return tf.cast(
                    tf.nn.sampled_softmax_loss(
                        localWt,  # Should have shape [num_classes, dim]
                        localB,
                        labels,
                        localInputs,
                        num_samples,  # The number of classes to randomly sample per batch
                        self.target_vocab_size),  # The number of classes
                    tf.float32)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_call = rnn.GRUCell(self.lsmt_size)
        if use_lstm:
            single_call = rnn.BasicLSTMCell(self.lsmt_size)

        if not forward_only:
            single_call = rnn.DropoutWrapper(single_call,
                                             input_keep_prob=1.0,
                                             output_keep_prob=self.dropout)

        cell = single_call
        if self.num_layers > 1:
            cell = rnn.MultiRNNCell([single_call] * self.num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            import copy
            temp_cell = copy.deepcopy(cell)
            return legacy_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                temp_cell,
                num_encoder_symbols=self.source_vocab_size,
                num_decoder_symbols=self.target_vocab_size,
                embedding_size=self.lsmt_size,
                output_projection=output_projection,
                feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for i in range(self.buckets[-1][0]):
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))

        for i in range(self.buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))

        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in range(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses = legacy_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)

            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                for b in range(len(self.buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]
        else:
            self.outputs, self.losses = legacy_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        # Gradients and SGD update operation for training the model.
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            params = tf.trainable_variables()
            for b in range(len(self.buckets)):
                gradients = tf.gradients(self.losses[b], params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, self.max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))

        self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=3)
        self.mergedSummaries = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter(config.graph_dir)
示例#5
0
X = tf.placeholder(tf.uint8, [None, None], name='X')  # [ BATCHSIZE, SEQLEN ]
Xo = tf.one_hot(X, ALPHASIZE, 1.0, 0.0)  # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
# expected outputs = same sequence shifted by 1 since we are trying to predict the next character
Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_')  # [ BATCHSIZE, SEQLEN ]
Yo_ = tf.one_hot(Y_, ALPHASIZE, 1.0, 0.0)  # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
# input state
Hin = tf.placeholder(tf.float32, [None, INTERNALSIZE * NLAYERS],
                     name='Hin')  # [ BATCHSIZE, INTERNALSIZE * NLAYERS]

# using a NLAYERS=3 layers of GRU cells, unrolled SEQLEN=30 times
# dynamic_rnn infers SEQLEN from the size of the inputs Xo

# How to properly apply dropout in RNNs: see README.md
cells = [rnn.GRUCell(INTERNALSIZE) for _ in range(NLAYERS)]
# "naive dropout" implementation
dropcells = [rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells]
multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)
multicell = rnn.DropoutWrapper(
    multicell, output_keep_prob=pkeep)  # dropout for the softmax layer

Yr, H = tf.nn.dynamic_rnn(multicell, Xo, dtype=tf.float32, initial_state=Hin)
# Yr: [ BATCHSIZE, SEQLEN, INTERNALSIZE ]
# H:  [ BATCHSIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence

H = tf.identity(H, name='H')  # just to give it a name

# Softmax layer implementation:
# Flatten the first two dimension of the output [ BATCHSIZE, SEQLEN, ALPHASIZE ] => [ BATCHSIZE x SEQLEN, ALPHASIZE ]
# then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
# From the readout point of view, a value coming from a sequence time step or a minibatch item is the same thing.
示例#6
0
# Create our TensorFlow Graph.
batchsize = tf.placeholder(tf.int32, name='batchsize')
lr = tf.placeholder(tf.float32, name='lr')
pkeep = tf.placeholder(tf.float32, name='pkeep')
X = tf.placeholder(tf.uint8, [None, None], name='X')  # Input vector
Xo = tf.one_hot(
    X, ALPHA_SIZE, 1.0,
    0.0)  # One Hots create vector size ALPHA_SIZE, all set 0 except character
Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_')  # Output tensor
Yo_ = tf.one_hot(Y_, ALPHA_SIZE, 1.0, 0.0)  # OneHot our output  also
Hin = tf.placeholder(tf.float32, [None, NUM_OF_GRUS * NUM_LAYERS],
                     name='Hin')  # Recurrent input states
cells = [rnn.GRUCell(NUM_OF_GRUS)
         for _ in range(NUM_LAYERS)]  # Create all our GRU cells per layer
dropcells = [
    rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells
]  # DropOut inside RNN
multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)
multicell = rnn.DropoutWrapper(
    multicell, output_keep_prob=pkeep)  # DropOut for SoftMax layer
Yr, H = tf.nn.dynamic_rnn(
    multicell, Xo, dtype=tf.float32,
    initial_state=Hin)  # Unrolling through time happens here
H = tf.identity(H, name='H')  # Last state of sequence
Yflat = tf.reshape(Yr, [-1, NUM_OF_GRUS])
Ylogits = layers.linear(Yflat, ALPHA_SIZE)
Yflat_ = tf.reshape(Yo_, [-1, ALPHA_SIZE])
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Yflat_)
loss = tf.reshape(loss, [batchsize, -1])
Yo = tf.nn.softmax(Ylogits, name='Yo')
Y = tf.argmax(Yo, 1)
示例#7
0
 def gru_cell(self):
     with tf.name_scope('gru_cell'):
         cell = rnn.GRUCell(self.hidden_size,
                            reuse=tf.get_variable_scope().reuse)
     return rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
示例#8
0
def lstm_cell(hidden_size, keep_prob):
    cell = rnn.BasicLSTMCell(num_units=hidden_size, forget_bias=1.0)
    return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
示例#9
0
    def __init__(self, args, training=True):
        self.args = args

        if not training:
            args.batch_size = 1
            args.seq_length = 1

        elif args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'gru':
            cell_fn == rnn.GRUCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception('model type not support')

        # 构造隐藏层
        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.nums_size)
            if training and (args.input_keep_prob < 1.0 or args.output_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(cell,
                                          input_keep_prob=args.input_keep_prob,
                                          output_keep_prob=args.output_keep_prob)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        # 构造输入层
        # 占位符

        self.input_data = tf.placeholder(
            tf.int32, shape=[args.batch_size, args.seq_length])
        self.targets = tf.placeholder(
            tf.int32, shape=[args.batch_size, args.seq_length])
        self.intial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable(
                'softmax_w', shape=[args.nums_size, args.vocab_size])
            softmax_b = tf.get_variable('softmax_b', shape=[args.vocab_size])

        embeddings = tf.get_variable(
            'embedding', [args.vocab_size, args.nums_size])
        inputs = tf.nn.embedding_lookup(embeddings, self.input_data)
        # 输出的shape为[batch_size,seq_length,num_size]

        # 训练时输入层进行dropout

        if training and args.output_keep_prob < 1.0:
            inputs = tf.nn.dropout(
                inputs, output_keep_prob=args.output_keep_prob)

        # 现在要把shape变成[batch_size,1,num_size]
        # 将第一维的seq_length,[batch_size,1,num_size]
        inputs = tf.split(inputs, args.seq_length, 1)
        # 最后变成一个[batch_size,num_size]的一个list
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # loop函数连接num_steps步的rnn_cell,将h(t-1)的输出prev做变换然后传入h(t)作为输入
        # 这里定义的loop实际在于当我们要测试运行结果,即让机器自己写文章时,我们需要对每一步
        # 的输出进行查看。如果我们是在训练中,我们并不需要这个loop函数
        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(
                tf.argmax(prev, 1))  # 输出的为vocab_size中的第某个序号
            return tf.nn.embedding_lookup(embeddings, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
        # 输出的shape为[num_steps][batch_size,num_size]

        # 这里的过程可以说基本等同于PTB模型,首先通过对output的重新梳理得到一个
        # [batch_size*seq_length, rnn_size]的输出,并将之放入softmax里,并通过sequence
        # loss by example函数进行训练

        output = tf.reshape(tf.concat(output, 1), [-1, args.nums_size])
        self.logits = tf.matmul(output, softmax_w) + \
            softmax_b  # 最后输出的维度一行vocab_size列的一维List
        self.probs = tf.nn.softmax(self.logits)

        loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(
            self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length], dtype=tf.float32)])
        # self.logits的shape为[batch_size*seq_length,vocab_size]
        # self.targets reshape为[1,batch_size*seq_length]
        # tf.ones(batch_size*seq_length,vocab_size)
        # 最后返回的结果长度为一维列表[1,batch_size*seq_length]

        # tf.nn.seq2seq.sequence_loss_by_example(logits, targets, weights):主要说一下这三个参数的意思和用法:
        # logits是一个二维的张量,比如是a*b,那么targets就是一个一维的张量长度为a,并且targets中元素的值是不能超过b的整形,32位的整数。也即是如果b等于4,那么targets中的元素的值都要小于4。
        # weights就是一个一维的张量长度为a,并且是一个tf.float32的数。这是权重的意思。
        #
		self.cost = cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
示例#10
0
Nbofbatch = trainsize // batchsize

learningi = tf.placeholder("float", [1])
learning_rate = 0.001 * pow(0.1, learningi[0])

x = tf.placeholder("float", [None, time_steps, n_input])
#input label placeholder
y = tf.placeholder("float", [None, n_classes])

#processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors
input = tf.unstack(x, time_steps, 1)

#defining the network
#lstm_layer=rnn.BasicLSTMCell(num_units,forget_bias=1)
lstm_layer = rnn.DropoutWrapper(rnn.BasicLSTMCell(num_units, forget_bias=1),
                                input_keep_prob=0.95,
                                output_keep_prob=0.95,
                                state_keep_prob=0.95)
outputs, _ = rnn.static_rnn(lstm_layer, input, dtype="float32")
# print(len(outputs))
outputs = tf.contrib.layers.fully_connected(
    outputs[-1],
    16,
    weights_regularizer=tf.contrib.layers.l2_regularizer(0.1),
    activation_fn=tf.nn.tanh)

prediction = tf.contrib.layers.fully_connected(outputs,
                                               1,
                                               activation_fn=tf.nn.tanh)
loss = tf.losses.mean_squared_error(y, prediction)
opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
示例#11
0
def inference(inputs,
              batch_size,
              num_steps,
              vocab_size,
              embedding_size,
              hidden_size,
              keep_prob,
              num_layers,
              num_classes,
              is_training,
              use_lstm=True,
              use_bidirectional_rnn=True):
    with tf.device('/cpu:0'):
        embedding = tf.get_variable(
            'embedding', [vocab_size, embedding_size],
            initializer=tf.random_uniform_initializer(),
            dtype=tf.float32)
        inputs_embedding = tf.nn.embedding_lookup(embedding, inputs)
    if is_training and keep_prob < 1:
        inputs_embedding = tf.nn.dropout(inputs_embedding, keep_prob)
    inputs_embedding = tf.unstack(inputs_embedding, axis=1)

    initializer = tf.random_uniform_initializer(-0.1, 0.1)

    if use_lstm:
        forward_single_cell = rnn.LSTMCell(num_units=hidden_size,
                                           initializer=initializer,
                                           forget_bias=1.0)
    else:
        forward_single_cell = rnn.GRUCell(num_units=hidden_size)
    if is_training and keep_prob < 1.0:
        forward_single_cell = rnn.DropoutWrapper(forward_single_cell,
                                                 output_keep_prob=keep_prob)
    forward_rnn_cell = rnn.MultiRNNCell(
        [forward_single_cell for _ in range(num_layers)])

    if use_lstm:
        backward_single_cell = rnn.LSTMCell(num_units=hidden_size,
                                            initializer=initializer,
                                            forget_bias=1.0)
    else:
        backward_single_cell = rnn.GRUCell(num_units=hidden_size)
    if is_training and keep_prob < 1.0:
        backward_single_cell = rnn.DropoutWrapper(backward_single_cell,
                                                  output_keep_prob=keep_prob)
    backward_rnn_cell = rnn.MultiRNNCell(
        [backward_single_cell for _ in range(num_layers)])

    bi_flag = 1
    if use_bidirectional_rnn:
        bi_flag = 2
        outputs, forward_final_state, backward_final_state = rnn.static_bidirectional_rnn(
            forward_rnn_cell,
            backward_rnn_cell,
            inputs_embedding,
            dtype=tf.float32,
            sequence_length=[num_steps] * batch_size)
        final_state = (tf.concat(
            [forward_final_state[0], backward_final_state[0]], axis=2),
                       tf.concat(
                           [forward_final_state[1], backward_final_state[1]],
                           axis=2))
    else:
        outputs, final_state = rnn.static_rnn(forward_rnn_cell,
                                              inputs_embedding,
                                              dtype=tf.float32,
                                              sequence_length=[num_steps] *
                                              batch_size)

    output = tf.reshape(tf.concat(outputs, axis=1),
                        shape=[-1, bi_flag * hidden_size])

    weights = tf.get_variable('weights', [bi_flag * hidden_size, num_classes],
                              dtype=tf.float32)
    biases = tf.get_variable('biases', [num_classes], dtype=tf.float32)
    logits = tf.matmul(output, weights) + biases

    return logits, final_state
示例#12
0
 def get_rnn_cell(self):
     return rnn.DropoutWrapper(
         LayerNormBasicLSTMCell(self.hidden_dim),
         input_keep_prob=self.dropout_keep_prob_t,
         output_keep_prob=self.dropout_keep_prob_t)
示例#13
0
    def build_model(self):

        self.X = tf.placeholder(tf.int32, [self.batch_size], name='input')
        self.Y = tf.placeholder(tf.int32, [self.batch_size], name='output')
        self.state = [
            tf.placeholder(tf.float32, [self.batch_size, self.rnn_size],
                           name='rnn_state') for _ in range(self.layers)
        ]
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        with tf.variable_scope('gru_layer'):
            sigma = self.sigma if self.sigma != 0 else np.sqrt(
                6.0 / (self.n_risks + self.rnn_size))
            if self.init_as_normal:
                initializer = tf.random_normal_initializer(mean=0,
                                                           stddev=sigma)
            else:
                initializer = tf.random_uniform_initializer(minval=-sigma,
                                                            maxval=sigma)
            embedding = tf.get_variable('embedding',
                                        [self.n_risks, self.rnn_size],
                                        initializer=initializer)
            # forward层的定义
            softmax_W = tf.get_variable('softmax_w',
                                        [self.n_risks, self.rnn_size],
                                        initializer=initializer)
            softmax_b = tf.get_variable(
                'softmax_b', [self.n_risks],
                initializer=tf.constant_initializer(0.0))

            # 多层简单GRU层定义
            cell = rnn_cell.GRUCell(self.rnn_size, activation=self.hidden_act)
            drop_cell = rnn_cell.DropoutWrapper(
                cell, output_keep_prob=self.dropout_p_hidden)
            stacked_cell = rnn_cell.MultiRNNCell([drop_cell] * self.layers)

            # 从embeddding层中获取X对应的向量
            inputs = tf.nn.embedding_lookup(embedding, self.X)
            output, state = stacked_cell(inputs, tuple(self.state))
            self.final_state = state
            self.output = output

        if self.is_training:
            '''
            Use other examples of the minibatch as negative samples.
            '''
            # 使得非input对应的embedding数据对应的output尽可能小
            sampled_W = tf.nn.embedding_lookup(softmax_W, self.Y)
            sampled_b = tf.nn.embedding_lookup(softmax_b, self.Y)
            logits = tf.matmul(output, sampled_W, transpose_b=True) + sampled_b
            self.yhat = self.final_activation(logits)
            self.cost = self.loss_function(self.yhat)
        else:
            logits = tf.matmul(output, softmax_W, transpose_b=True) + softmax_b
            self.yhat = self.final_activation(logits)
        self.logit = tf.matmul(output, softmax_W, transpose_b=True)
        self.sfw = softmax_W
        # print 'yhat',self.yhat.shape
        # print 'logits' ,logits.shape
        # print 'output',output.shape
        # print 'state ',self.final_state
        # print 'sampled_W',sampled_W.shape
        if not self.is_training:
            return

        # 梯度下降学习速率,避免鞍点及震荡
        self.lr = tf.maximum(
            1e-5,
            tf.train.exponential_decay(self.learning_rate,
                                       self.global_step,
                                       self.decay_steps,
                                       self.decay,
                                       staircase=True))
        '''
        Try different optimizers.
        '''
        # optimizer = tf.train.AdagradOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        # optimizer = tf.train.AdadeltaOptimizer(self.lr)
        # optimizer = tf.train.RMSPropOptimizer(self.lr)

        tvars = tf.trainable_variables()

        # 通过设置normvalue,避免梯度消失或者梯度爆炸
        gvs = optimizer.compute_gradients(self.cost, tvars)
        if self.grad_cap > 0:
            capped_gvs = [(tf.clip_by_norm(grad, self.grad_cap), var)
                          for grad, var in gvs]
        else:
            capped_gvs = gvs
        self.train_op = optimizer.apply_gradients(capped_gvs,
                                                  global_step=self.global_step)
示例#14
0
def DropoutGRUCell(size=DEFAULT_GRU_INTERNAL_SIZE, pkeep=DEFAULT_GRU_DROPOUT_KEEP_RATE):
    cell = rnn.GRUCell(size)
    cell = rnn.DropoutWrapper(cell, input_keep_prob=pkeep)
    return cell
示例#15
0
def lstm_cell():
    cell = rnn.LSTMCell(hidden_size, reuse=tf.get_variable_scope().reuse)
    return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
示例#16
0
out_weights = tf.Variable(tf.random_normal([num_units, n_classes]))
out_bias = tf.Variable(tf.random_normal([n_classes]))

# defining placeholders
# input image placeholder
x = tf.placeholder("float", [None, time_steps, n_input])
# input label placeholder
y = tf.placeholder("float", [None, n_classes])
keep_prob = tf.placeholder(dtype=tf.float32)

# processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors
input = tf.unstack(x, time_steps, 1)

# defining the network
lstm_layer = rnn.BasicLSTMCell(num_units, forget_bias=1)
dw_cell = rnn.DropoutWrapper(lstm_layer, output_keep_prob=keep_prob)
outputs, _ = rnn.static_rnn(dw_cell, input, dtype="float32")

# converting last output of dimension [batch_size,num_units] to [batch_size,n_classes] by out_weight multiplication
prediction = tf.matmul(outputs[-1], out_weights) + out_bias

# loss_function
loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
# optimization
opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# model evaluation
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
示例#17
0
文件: rnn_lm.py 项目: agokrani/rnn-lm
 def make_cell():
     cell = rnn.BasicLSTMCell(self.num_hidden)
     cell = rnn.DropoutWrapper(cell,
                               output_keep_prob=self.keep_prob)
     return cell
示例#18
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 lstm_hidden_size,
                 fc_hidden_size,
                 embedding_size,
                 embedding_type,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0,
                 pretrained_embedding=None):

        # Placeholders for input, output, dropout_prob and training_tag
        self.input_x_front = tf.placeholder(tf.int32, [None, sequence_length],
                                            name="input_x_front")
        self.input_x_behind = tf.placeholder(tf.int32, [None, sequence_length],
                                             name="input_x_behind")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        self.global_step = tf.Variable(0, trainable=False, name="Global_Step")

        def _linear(input_, output_size, scope="SimpleLinear"):
            """
            Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k]
            Args:
                input_: a tensor or a list of 2D, batch x n, Tensors.
                output_size: int, second dimension of W[i].
                scope: VariableScope for the created subgraph; defaults to "SimpleLinear".
            Returns:
                A 2D Tensor with shape [batch x output_size] equal to
                sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
            Raises:
                ValueError: if some of the arguments has unspecified or wrong shape.
            """

            shape = input_.get_shape().as_list()
            if len(shape) != 2:
                raise ValueError(
                    "Linear is expecting 2D arguments: {0}".format(str(shape)))
            if not shape[1]:
                raise ValueError(
                    "Linear expects shape[1] of arguments: {0}".format(
                        str(shape)))
            input_size = shape[1]

            # Now the computation.
            with tf.variable_scope(scope):
                W = tf.get_variable("W", [input_size, output_size],
                                    dtype=input_.dtype)
                b = tf.get_variable("b", [output_size], dtype=input_.dtype)

            return tf.nn.xw_plus_b(input_, W, b)

        def _highway_layer(input_,
                           size,
                           num_layers=1,
                           bias=-2.0,
                           f=tf.nn.relu):
            """
            Highway Network (cf. http://arxiv.org/abs/1505.00387).
            t = sigmoid(Wy + b)
            z = t * g(Wy + b) + (1 - t) * y
            where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
            """

            for idx in range(num_layers):
                g = f(
                    _linear(input_,
                            size,
                            scope=("highway_lin_{0}".format(idx))))
                t = tf.sigmoid(
                    _linear(
                        input_, size, scope=("highway_gate_{0}".format(idx))) +
                    bias)
                output = t * g + (1. - t) * input_
                input_ = output

            return output

        # Embedding Layer
        with tf.device("/cpu:0"), tf.name_scope("embedding"):
            # Use random generated the word vector by default
            # Can also be obtained through our own word vectors trained by our corpus
            if pretrained_embedding is None:
                self.embedding = tf.Variable(tf.random_uniform(
                    [vocab_size, embedding_size],
                    minval=-1.0,
                    maxval=1.0,
                    dtype=tf.float32),
                                             trainable=True,
                                             name="embedding")
            else:
                if embedding_type == 0:
                    self.embedding = tf.constant(pretrained_embedding,
                                                 dtype=tf.float32,
                                                 name="embedding")
                if embedding_type == 1:
                    self.embedding = tf.Variable(pretrained_embedding,
                                                 trainable=True,
                                                 dtype=tf.float32,
                                                 name="embedding")
            self.embedded_sentence_front = tf.nn.embedding_lookup(
                self.embedding, self.input_x_front)
            self.embedded_sentence_behind = tf.nn.embedding_lookup(
                self.embedding, self.input_x_behind)

        # Add dropout
        with tf.name_scope("dropout-input"):
            self.embedded_sentence_front_drop = tf.nn.dropout(
                self.embedded_sentence_front, self.dropout_keep_prob)
            self.embedded_sentence_behind_drop = tf.nn.dropout(
                self.embedded_sentence_behind, self.dropout_keep_prob)

        # Bi-LSTM Layer
        with tf.name_scope("Bi-lstm"):
            lstm_fw_cell = rnn.BasicLSTMCell(
                lstm_hidden_size)  # forward direction cell
            lstm_bw_cell = rnn.BasicLSTMCell(
                lstm_hidden_size)  # backward direction cell
            if self.dropout_keep_prob is not None:
                lstm_fw_cell = rnn.DropoutWrapper(
                    lstm_fw_cell, output_keep_prob=self.dropout_keep_prob)
                lstm_bw_cell = rnn.DropoutWrapper(
                    lstm_bw_cell, output_keep_prob=self.dropout_keep_prob)

            # Creates a dynamic bidirectional recurrent neural network
            # shape of `outputs`: tuple -> (outputs_fw, outputs_bw)
            # shape of `outputs_fw`: [batch_size, sequence_length, lstm_hidden_size]

            # shape of `state`: tuple -> (outputs_state_fw, output_state_bw)
            # shape of `outputs_state_fw`: tuple -> (c, h) c: memory cell; h: hidden state
            outputs_front, state_front = tf.nn.bidirectional_dynamic_rnn(
                lstm_fw_cell,
                lstm_bw_cell,
                self.embedded_sentence_front_drop,
                dtype=tf.float32)
            outputs_behind, state_behind = tf.nn.bidirectional_dynamic_rnn(
                lstm_fw_cell,
                lstm_bw_cell,
                self.embedded_sentence_behind_drop,
                dtype=tf.float32)

            # Concat output
            # shape of `lstm_concat`: [batch_size, sequence_length, lstm_hidden_size * 2]
            self.lstm_concat_front = tf.concat(outputs_front, axis=2)
            self.lstm_concat_behind = tf.concat(outputs_behind, axis=2)

            # shape of `lstm_out`: [batch_size, sequence_length, lstm_hidden_size * 2, 1]
            self.lstm_out_front = tf.expand_dims(self.lstm_concat_front,
                                                 axis=-1)
            self.lstm_out_behind = tf.expand_dims(self.lstm_concat_behind,
                                                  axis=-1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs_front = []
        pooled_outputs_behind = []

        for filter_size in filter_sizes:
            with tf.name_scope("conv-filter{0}".format(filter_size)):
                # Convolution Layer
                filter_shape = [
                    filter_size, lstm_hidden_size * 2, 1, num_filters
                ]
                W = tf.Variable(tf.truncated_normal(shape=filter_shape,
                                                    stddev=0.1,
                                                    dtype=tf.float32),
                                name="W")
                b = tf.Variable(tf.constant(value=0.1,
                                            shape=[num_filters],
                                            dtype=tf.float32),
                                name="b")
                conv_front = tf.nn.conv2d(self.lstm_out_front,
                                          W,
                                          strides=[1, 1, 1, 1],
                                          padding="VALID",
                                          name="conv")

                conv_behind = tf.nn.conv2d(self.lstm_out_behind,
                                           W,
                                           strides=[1, 1, 1, 1],
                                           padding="VALID",
                                           name="conv_behind")

                conv_front = tf.nn.bias_add(conv_front, b)
                conv_behind = tf.nn.bias_add(conv_behind, b)

                # Batch Normalization Layer
                conv_bn_front = batch_norm(conv_front,
                                           is_training=self.is_training,
                                           trainable=True,
                                           updates_collections=None)
                conv_bn_behind = batch_norm(conv_behind,
                                            is_training=self.is_training,
                                            trainable=True,
                                            updates_collections=None)

                # Apply nonlinearity
                conv_out_front = tf.nn.relu(conv_bn_front, name="relu_front")
                conv_out_behind = tf.nn.relu(conv_bn_behind,
                                             name="relu_behind")

            with tf.name_scope("pool-filter{0}".format(filter_size)):
                # Maxpooling over the outputs
                avg_pooled_front = tf.nn.avg_pool(
                    conv_out_front,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="pool")

                max_pooled_front = tf.nn.max_pool(
                    conv_out_front,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="pool")

                avg_pooled_behind = tf.nn.avg_pool(
                    conv_out_behind,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="pool")

                max_pooled_behind = tf.nn.max_pool(
                    conv_out_behind,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="pool")

                # shape of `pooled_combine`: [batch_size, 1, 1, num_filters * 2]
                pooled_combine_front = tf.concat(
                    [avg_pooled_front, max_pooled_front], axis=3)
                pooled_combine_behind = tf.concat(
                    [avg_pooled_behind, max_pooled_behind], axis=3)

            pooled_outputs_front.append(pooled_combine_front)
            pooled_outputs_behind.append(pooled_combine_behind)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)

        # shape of `pool`: [batch_size, 1, 1, num_filters_total * 2]
        self.pool_front = tf.concat(pooled_outputs_front, axis=3)
        self.pool_behind = tf.concat(pooled_outputs_behind, axis=3)

        self.pool_flat_front = tf.reshape(self.pool_front,
                                          shape=[-1, num_filters_total * 2])
        self.pool_flat_behind = tf.reshape(self.pool_behind,
                                           shape=[-1, num_filters_total * 2])

        # shape of `pool_flat_combine`: [batch_size, num_filters_total * 2 * 2]
        self.pool_flat_combine = tf.concat(
            [self.pool_flat_front, self.pool_flat_behind], axis=1)

        # Fully Connected Layer
        with tf.name_scope("fc"):
            W = tf.Variable(tf.truncated_normal(
                shape=[num_filters_total * 2 * 2, fc_hidden_size],
                stddev=0.1,
                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[fc_hidden_size],
                                        dtype=tf.float32),
                            name="b")
            self.fc = tf.nn.xw_plus_b(self.pool_flat_combine, W, b)

            # Batch Normalization Layer
            self.fc_bn = batch_norm(self.fc,
                                    is_training=self.is_training,
                                    trainable=True,
                                    updates_collections=None)

            # Apply nonlinearity
            self.fc_out = tf.nn.relu(self.fc_bn, name="relu")

        # Highway Layer
        with tf.name_scope("highway"):
            self.highway = _highway_layer(self.fc_out,
                                          self.fc_out.get_shape()[1],
                                          num_layers=1,
                                          bias=0)

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob)

        # Final scores and predictions
        with tf.name_scope("output"):
            W = tf.Variable(tf.truncated_normal(
                shape=[fc_hidden_size, num_classes],
                stddev=0.1,
                dtype=tf.float32),
                            name="W")
            b = tf.Variable(tf.constant(value=0.1,
                                        shape=[num_classes],
                                        dtype=tf.float32),
                            name="b")
            self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits")
            self.softmax_scores = tf.nn.softmax(self.logits,
                                                name="softmax_scores")
            self.predictions = tf.argmax(self.logits, 1, name="predictions")
            self.topKPreds = tf.nn.top_k(self.softmax_scores,
                                         k=1,
                                         sorted=True,
                                         name="topKPreds")

        # Calculate mean cross-entropy loss, L2 loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.input_y, logits=self.logits)
            losses = tf.reduce_mean(losses, name="softmax_losses")
            l2_losses = tf.add_n([
                tf.nn.l2_loss(tf.cast(v, tf.float32))
                for v in tf.trainable_variables()
            ],
                                 name="l2_losses") * l2_reg_lambda
            self.loss = tf.add(losses, l2_losses, name="loss")

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")

        # TODO: Reconsider the metrics calculation
        # Number of correct predictions
        with tf.name_scope("num_correct"):
            correct = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.num_correct = tf.reduce_sum(tf.cast(correct, "float"),
                                             name="num_correct")

        # Calculate Fp
        with tf.name_scope("fp"):
            fp = tf.metrics.false_positives(labels=tf.argmax(self.input_y, 1),
                                            predictions=self.predictions)
            self.fp = tf.reduce_sum(tf.cast(fp, "float"), name="fp")

        # Calculate Fn
        with tf.name_scope("fn"):
            fn = tf.metrics.false_negatives(labels=tf.argmax(self.input_y, 1),
                                            predictions=self.predictions)
            self.fn = tf.reduce_sum(tf.cast(fn, "float"), name="fn")

        # Calculate Recall
        with tf.name_scope("recall"):
            self.recall = self.num_correct / (self.num_correct + self.fn)

        # Calculate Precision
        with tf.name_scope("precision"):
            self.precision = self.num_correct / (self.num_correct + self.fp)

        # Calculate F1
        with tf.name_scope("F1"):
            self.F1 = (2 * self.precision * self.recall) / (self.precision +
                                                            self.recall)

        # Calculate AUC
        with tf.name_scope("AUC"):
            self.AUC = tf.metrics.auc(self.softmax_scores,
                                      self.input_y,
                                      name="AUC")
示例#19
0
        #'cnnoutscale': tf.Variable(weights['cnnoutscale']),
        #'featbeta': tf.Variable(tf.zeros([4096])),
        #'featscale': tf.Variable(tf.ones([4096])),
        #'gbeta': tf.Variable(tf.zeros([1000])),
        #'gscale': tf.Variable(tf.ones([1000]))
    }

    # question-embedding
    #embed_ques_W = tf.Variable(tf.random_uniform([vocabulary_size, input_embedding_size], -0.08, 0.08), name='embed_ques_W')

    # encoder: RNN body
    lstm_1 = rnn_cell.LSTMCell(rnn_size,
                               input_embedding_size,
                               use_peepholes=True,
                               state_is_tuple=False)
    lstm_dropout_1 = rnn_cell.DropoutWrapper(lstm_1,
                                             output_keep_prob=1 - dropout_rate)
    lstm_2 = rnn_cell.LSTMCell(rnn_size,
                               rnn_size,
                               use_peepholes=True,
                               state_is_tuple=False)
    lstm_dropout_2 = rnn_cell.DropoutWrapper(lstm_2,
                                             output_keep_prob=1 - dropout_rate)
    stacked_lstm = rnn_cell.MultiRNNCell([lstm_dropout_1, lstm_dropout_2],
                                         state_is_tuple=False)

    image = tf.placeholder(tf.float32, [batch_size, 2048])
    question = tf.placeholder(tf.int32, [batch_size, max_words_q])
    answers_true = tf.placeholder(tf.int32, (batch_size, 1000))
    noise = tf.placeholder(tf.float32, [batch_size, 4096])

    #state = tf.zeros([batch_size, stacked_lstm.state_size])
示例#20
0
    def _ner_private(self, input_data, config, is_training):
        """Decode model for ner

        Args:
            encoder_units - these are the encoder units:
            [batch_size X encoder_size] with the one the pos prediction
            pos_prediction:
            must be the same size as the encoder_size

        returns:
            logits
        """
        # concatenate the encoder_units and the pos_prediction

        # pos_prediction = tf.reshape(pos_prediction,
        #                             [self.batch_size, self.num_steps, self.pos_embedding_size])
        print('Hello before encoder', input_data)
        encoder_units = tf.transpose(input_data, [1, 0, 2])
        # ner_inputs = tf.concat([pos_prediction, encoder_units], 2)
        ner_inputs = input_data

        with tf.variable_scope("ner_decoder"):
            # cell = rnn.BasicLSTMCell(config.ner_decoder_size, forget_bias=1.0, reuse=tf.get_variable_scope().reuse)
            #
            # if is_training and config.keep_prob < 1:
            #     cell = rnn.DropoutWrapper(
            #         cell, output_keep_prob=config.keep_prob)
            #
            # decoder_outputs, decoder_states = tf.nn.dynamic_rnn(cell,
            #                                                     ner_inputs,
            #                                                     dtype=tf.float32,
            #                                                     time_major=False,
            #                                                     scope="ner_rnn")
            lstm_cell_fw = tf.compat.v1.nn.rnn_cell.LSTMCell(
                config.ner_decoder_size / 2,
                reuse=tf.get_variable_scope().reuse,
                forget_bias=1.0)
            lstm_cell_bw = tf.compat.v1.nn.rnn_cell.LSTMCell(
                config.ner_decoder_size / 2,
                reuse=tf.get_variable_scope().reuse,
                forget_bias=1.0)
            if is_training and config.keep_prob < 1:
                lstm_cell_fw = rnn.DropoutWrapper(
                    lstm_cell_fw, output_keep_prob=config.keep_prob)
                lstm_cell_bw = rnn.DropoutWrapper(
                    lstm_cell_bw, output_keep_prob=config.keep_prob)
            decoder_outputs, decoder_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=lstm_cell_fw,
                cell_bw=lstm_cell_bw,
                dtype=tf.float32,
                inputs=ner_inputs,
                time_major=False,
                scope="ner_rnn")
            decoder_outputs = tf.concat(decoder_outputs, axis=2)

            output = tf.reshape(tf.concat(decoder_outputs, 1),
                                [-1, config.ner_decoder_size])

            softmax_w = tf.get_variable(
                "softmax_w", [config.ner_decoder_size, config.num_ner_tags])
            softmax_b = tf.get_variable("softmax_b", [config.num_ner_tags])
            logits = tf.matmul(output, softmax_w) + softmax_b

        return logits, decoder_states
示例#21
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        # choose different rnn cell
        if args.model == 'rnn':
            cell_fn = rnn.RNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.LSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        # warp multi layered rnn cell into one cell with dropout
        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0
                             or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(
                    cell,
                    input_keep_prob=args.input_keep_prob,
                    output_keep_prob=args.output_keep_prob)
            cells.append(cell)
        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        # input/target data (int32 since input is char-level)
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        # softmax output layer, use softmax to classify
        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        # transform input to embedding
        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        # unstack the input to fits in rnn model
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # loop function for rnn_decoder, which take the previous i-th cell's output and generate the (i+1)-th cell's input
        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # rnn_decoder to generate the ouputs and final state. When we are not training the model, we use the loop function.
        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if not training else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        # output layer
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        # loss is calculate by the log loss and taking the average.
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()

        # calculate gradients
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)

        # apply gradient change to the all the trainable variable.
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
示例#22
0
 def cell():
     cell = rnn.DropoutWrapper(rnn.GRUCell(num_units=self.num_unit))
     return cell
示例#23
0
    def build_model(self):
        """
        build bilstm model architecture
        1. embeddding layer, 2.Bi-LSTM layer, 3.concat, 4.FC layer 5.softmax
        """
        # 1. Embedding layer
        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            self.embedding_matrix = tf.get_variable(shape=[self.vocabulary_size, self.embedding_dim],
                                                    initializer=tf.contrib.layers.xavier_initializer(uniform=True),
                                                    name='embedding_matrix',
                                                    trainable=self.embedding_trainable)

            # get emebedding of words in the sentence, [None, sequence_length, embedding_dim]
            self.embedded_words = tf.nn.embedding_lookup(self.embedding_matrix, self.sentence)

        # 2. Bi-LSTM layer
        with tf.name_scope('bilstm_layer'):
            lstm_fw_cell = rnn.BasicLSTMCell(num_units=self.hidden_size)  # forward direction cell
            lstm_bw_cell = rnn.BasicLSTMCell(num_units=self.hidden_size)  # backward direction cell

            if self.lstm_drop_out:
                lstm_fw_cell = rnn.DropoutWrapper(cell=lstm_fw_cell,
                                                  output_keep_prob=self.dropout_keep_prob)
                lstm_bw_cell = rnn.DropoutWrapper(cell=lstm_bw_cell,
                                                  output_keep_prob=self.dropout_keep_prob)
            '''
            bidirectional_dynamic_rnn: input:  [batch_size, sequence_length, embedding_dim], max_time == sequence_length
                                       output: A tuple (outputs, output_states)
                                            outputs: A tuple (output_fw, output_bw)
                                               output_fw: [batch_size, max_time, cell_fw.output_size]
                                               output_bw: [batch_size, max_time, cell_bw.output_size]
            '''
            (fw_output, bw_output), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                                                        cell_bw=lstm_bw_cell,
                                                                        inputs=self.embedded_words,
                                                                        dtype=tf.float32)
            print("bidirectional_dynamic_rnn outputs: ", fw_output.get_shape(), bw_output.get_shape())

            # 3. concat, axis=2, concat cell_fw.output_size and cell_bw.output_size
            output_rnn = tf.concat((fw_output, bw_output), axis=2)  # [batch_size, sequence_length, hidden_size * 2]
            # last cell output
            self.output_rnn_last = tf.reduce_mean(output_rnn, axis=1)  # [batch_size, hidden_size * 2]
            print('last cell output:', self.output_rnn_last.get_shape())

        with tf.name_scope('readout'):
            # 4.linear classifier
            self.W_projection = tf.get_variable(shape=[self.hidden_size * 2, self.label_size],
                                                initializer=tf.contrib.layers.xavier_initializer(uniform=True),
                                                name='linear_W_projection')
            self.b_projection = tf.get_variable(shape=[self.label_size],
                                                name='linear_b_projection')
            self.logits = tf.add(tf.matmul(self.output_rnn_last, self.W_projection), self.b_projection, name='logits')
            self.prediction_probs = tf.nn.softmax(self.logits)

        with tf.name_scope("loss"):
            l2_loss = tf.constant(0.0)
            if self.embedding_trainable:
                l2_loss += tf.nn.l2_loss(self.embedding_matrix)

            # l2_losses = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * l2_lambda
            l2_loss += tf.nn.l2_loss(self.W_projection)
            l2_loss += tf.nn.l2_loss(self.b_projection)

            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
            self.loss = tf.reduce_mean(losses) + self.l2_reg_lambda * l2_loss

        with tf.name_scope("accuracy"):
            labels = tf.argmax(self.labels, 1)
            self.predictions = tf.argmax(self.logits, 1, name="predictions")
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, labels), "float"), name="accuracy")
示例#24
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0
                             or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(
                    cell,
                    input_keep_prob=args.input_keep_prob,
                    output_keep_prob=args.output_keep_prob)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if not training else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
示例#25
0
    def build_wp(self, id):
        print('creating %s weak predictor network...' % id)
        with self.graph.as_default() as graph:
            with tf.variable_scope(id) as scope:

                self.rnn_cell[id] = rnn_cell = rnn.MultiRNNCell([
                    rnn.DropoutWrapper(rnn.LSTMCell(
                        self.config.LSTM_LAYER_SIZE),
                                       input_keep_prob=self.keep_prob)
                    for _ in range(self.config.LSTM_LAYERS)
                ])

                state = ()
                for s in rnn_cell.state_size:
                    c = tf.placeholder(tf.float32, [None, s.c])
                    h = tf.placeholder(tf.float32, [None, s.h])
                    state += (tf.contrib.rnn.LSTMStateTuple(c, h), )
                self.state[id] = state

                # Batch size x time steps x features.
                output, new_state = tf.nn.dynamic_rnn(
                    rnn_cell,
                    self.input,
                    initial_state=state,
                    sequence_length=self.seq_len)
                self.new_state[id] = new_state

                fc_layer_idx = 0
                for num_units in self.config.FC_LAYERS:
                    scope_name = 'fc_layer_%d' % fc_layer_idx
                    with tf.name_scope(scope_name):
                        output = tf.contrib.layers.fully_connected(
                            output,
                            num_units,
                            activation_fn=tf.nn.relu,
                            scope='dense_%d' % fc_layer_idx)
                        output = tf.nn.dropout(output, self.keep_prob)
                    fc_layer_idx += 1

                # final layer to make prediction
                with tf.name_scope('prediction_layer'):
                    self.returns[
                        id] = returns = tf.contrib.layers.fully_connected(
                            output, 1, activation_fn=None)

                with tf.name_scope('loss'):
                    diff = returns - tf.expand_dims(self.labels, 2)
                    self.sse[id] = sse = tf.reduce_sum(
                        tf.multiply(tf.square(diff),
                                    tf.expand_dims(self.mask, 2)))
                    self.cost[id] = cost = sse / tf.reduce_sum(self.mask)

                    self.optimizer[id] = optimizer = tf.train.AdamOptimizer()
                    self.vars[id] = vars = tf.trainable_variables(scope.name)
                    self.grads_and_vars[
                        id] = grads_and_vars = optimizer.compute_gradients(
                            cost, var_list=vars)
                    self.train[id] = optimizer.apply_gradients(grads_and_vars)

                self.saver[id] = tf.train.Saver(tf.trainable_variables(
                    scope.name),
                                                max_to_keep=None)
示例#26
0
    def __init__(self, reversed_dict, article_max_len, summary_max_len, args, forward_only=False):
        self.vocabulary_size = len(reversed_dict)
        self.embedding_size = args.embedding_size
        self.num_hidden = args.num_hidden
        self.num_layers = args.num_layers
        self.learning_rate = args.learning_rate
        self.beam_width = args.beam_width
        if not forward_only:
            self.keep_prob = args.keep_prob
        else:
            self.keep_prob = 1.0
        self.cell = tf.nn.rnn_cell.BasicLSTMCell
        with tf.variable_scope("decoder/projection"):
            self.projection_layer = tf.layers.Dense(self.vocabulary_size, use_bias=False)

        self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
        self.X = tf.placeholder(tf.int32, [None, article_max_len])
        self.X_len = tf.placeholder(tf.int32, [None])
        self.decoder_input = tf.placeholder(tf.int32, [None, summary_max_len])
        self.decoder_len = tf.placeholder(tf.int32, [None])
        self.decoder_target = tf.placeholder(tf.int32, [None, summary_max_len])
        self.global_step = tf.Variable(0, trainable=False)

        with tf.name_scope("embedding"):
            if not forward_only and args.glove:
                init_embeddings = tf.constant(get_init_embedding(reversed_dict, self.embedding_size), dtype=tf.float32)
            else:
                init_embeddings = tf.random_uniform([self.vocabulary_size, self.embedding_size], -1.0, 1.0)
            self.embeddings = tf.get_variable("embeddings", initializer=init_embeddings)
            self.encoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.X), perm=[1, 0, 2])
            self.decoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(self.embeddings, self.decoder_input), perm=[1, 0, 2])

        with tf.name_scope("encoder"):
            fw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
            bw_cells = [self.cell(self.num_hidden) for _ in range(self.num_layers)]
            fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
            bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]

            encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                fw_cells, bw_cells, self.encoder_emb_inp,
                sequence_length=self.X_len, time_major=True, dtype=tf.float32)
            self.encoder_output = tf.concat(encoder_outputs, 2)
            encoder_state_c = tf.concat((encoder_state_fw[0].c, encoder_state_bw[0].c), 1)
            encoder_state_h = tf.concat((encoder_state_fw[0].h, encoder_state_bw[0].h), 1)
            self.encoder_state = rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)

        with tf.name_scope("decoder"), tf.variable_scope("decoder") as decoder_scope:
            decoder_cell = self.cell(self.num_hidden * 2)

            if not forward_only:
                attention_states = tf.transpose(self.encoder_output, [1, 0, 2])
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    self.num_hidden * 2, attention_states, memory_sequence_length=self.X_len, normalize=True)
                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
                                                                   attention_layer_size=self.num_hidden * 2)
                initial_state = decoder_cell.zero_state(dtype=tf.float32, batch_size=self.batch_size)
                initial_state = initial_state.clone(cell_state=self.encoder_state)
                helper = tf.contrib.seq2seq.TrainingHelper(self.decoder_emb_inp, self.decoder_len, time_major=True)
                decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, initial_state)
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, scope=decoder_scope)
                self.decoder_output = outputs.rnn_output
                self.logits = tf.transpose(
                    self.projection_layer(self.decoder_output), perm=[1, 0, 2])
                self.logits_reshape = tf.concat(
                    [self.logits, tf.zeros([self.batch_size, summary_max_len - tf.shape(self.logits)[1], self.vocabulary_size])], axis=1)
            else:
                tiled_encoder_output = tf.contrib.seq2seq.tile_batch(
                    tf.transpose(self.encoder_output, perm=[1, 0, 2]), multiplier=self.beam_width)
                tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, multiplier=self.beam_width)
                tiled_seq_len = tf.contrib.seq2seq.tile_batch(self.X_len, multiplier=self.beam_width)
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    self.num_hidden * 2, tiled_encoder_output, memory_sequence_length=tiled_seq_len, normalize=True)
                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
                                                                   attention_layer_size=self.num_hidden * 2)
                initial_state = decoder_cell.zero_state(dtype=tf.float32, batch_size=self.batch_size * self.beam_width)
                initial_state = initial_state.clone(cell_state=tiled_encoder_final_state)
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=self.embeddings,
                    start_tokens=tf.fill([self.batch_size], tf.constant(2)),
                    end_token=tf.constant(3),
                    initial_state=initial_state,
                    beam_width=self.beam_width,
                    output_layer=self.projection_layer
                )
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder, output_time_major=True, maximum_iterations=summary_max_len, scope=decoder_scope)
                self.prediction = tf.transpose(outputs.predicted_ids, perm=[1, 2, 0])

        with tf.name_scope("loss"):
            if not forward_only:
                crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.logits_reshape, labels=self.decoder_target)
                weights = tf.sequence_mask(self.decoder_len, summary_max_len, dtype=tf.float32)
                self.loss = tf.reduce_sum(crossent * weights / tf.to_float(self.batch_size))

                params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, params)
                clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                self.update = optimizer.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)
示例#27
0
    def __init__(self,
                 config: BiLSTMConfig,
                 is_training,
                 input_ids,
                 label_ids,
                 seq_length,
                 init_embedding=None):
        """Constructor for BertModel.

        Args:
          config: `BertConfig` instance.
          is_training: bool. rue for training model, false for eval model. Controls
            whether dropout will be applied.
          input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size].
          label_ids: (optional) int64 Tensor of shape [batch_size, seq_length].
          seq_length: (optional) int64 Tensor of shape [batch_size].
          init_embedding: (optional)

        Raises:
          ValueError: The config is invalid or one of the input tensor shapes
            is invalid.
        """
        self.input_ids = input_ids
        self.label_ids = label_ids
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        batch_size = input_shape[0]
        max_length = input_shape[1]
        window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            embedding = tf.Variable(init_embedding,
                                    dtype=tf.float32,
                                    name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(embedding, input_ids)
            feat_size = window_size
            x = tf.reshape(x,
                           [batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        with tf.variable_scope('rnn_cell'):
            if config.rnn_cell == 'lstm':
                fw_cell = tf.nn.rnn_cell.LSTMCell(config.hidden_size,
                                                  name='basic_lstm_cell')
                bw_cell = tf.nn.rnn_cell.LSTMCell(config.hidden_size,
                                                  name='basic_lstm_cell')
            else:
                fw_cell = rnn.GRUCell(config.hidden_size)
                bw_cell = rnn.GRUCell(config.hidden_size)
            fw_cell = rnn.DropoutWrapper(fw_cell,
                                         output_keep_prob=1.0 -
                                         config.hidden_dropout_prob)
            bw_cell = rnn.DropoutWrapper(bw_cell,
                                         output_keep_prob=1.0 -
                                         config.hidden_dropout_prob)
            fw_multi_cell = rnn.MultiRNNCell([fw_cell] *
                                             config.num_hidden_layers)
            bw_multi_cell = rnn.MultiRNNCell([bw_cell] *
                                             config.num_hidden_layers)

        with tf.variable_scope('rnn'):
            if config.bi_direction:
                (forward_output,
                 backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                     cell_fw=fw_multi_cell,
                     cell_bw=bw_multi_cell,
                     inputs=x,
                     sequence_length=seq_length,
                     dtype=tf.float32)
                output = tf.concat([forward_output, backword_output], axis=2)
            else:
                forward_output, _ = tf.nn.dynamic_rnn(
                    cell=fw_multi_cell,
                    inputs=x,
                    sequence_length=seq_length,
                    dtype=tf.float32)
                output = forward_output

        with tf.variable_scope('output'):
            logits = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            self.prediction = tf.argmax(logits, axis=-1)

        with tf.variable_scope('loss'):
            weight = tf.sequence_mask(seq_length, dtype=tf.float32)
            self.loss = tf.contrib.seq2seq.sequence_loss(
                logits=logits,
                targets=self.label_ids,
                weights=weight,
                average_across_timesteps=True,
                average_across_batch=True)
 def op_cell():
     return rnn.DropoutWrapper(cell(),
                               output_keep_prob=config.keep_prob)
示例#29
0
Epoch = tf.placeholder("float")
X = tf.placeholder("float", [None, num_steps, input_size])
y = tf.placeholder("float", [None, num_steps, output_size])

batch_size = tf.placeholder(tf.int32, [])
# keep_prob = tf.placeholder(tf.float32)

# 设置单层LSTM
lstm_cell = rnn.BasicLSTMCell(num_units=hidden_size,
                              forget_bias=1.0,
                              state_is_tuple=True)

# 设置dropout
if mode == 'dropout' or mode == 'cost_limited':
    lstm_cell = rnn.DropoutWrapper(cell=lstm_cell,
                                   input_keep_prob=1.0,
                                   output_keep_prob=keep_prob)

# Double-LSTM
# mlstm_cell = rnn.MultiRNNCell([lstm_cell] * 2, state_is_tuple=True)

# 初始化状态
init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
# init_state = lstm_cell.zero_state(Batch_Size, dtype=tf.float32)

# outputs, state = tf.nn.dynamic_rnn(mlstm_cell, inputs=X, initial_state=init_state, time_major=False)
# h_state = outputs[:, -1, :]

outputs = []
state = init_state
with tf.variable_scope('RNN'):
示例#30
0
 def dec_cell(self,num):
     #return tfn.MultiRNNCell([tfn.GRUCell(num,name="dec_cell"+str(i)) for i in range(self.config['num'])])
     return tfn.DropoutWrapper(tfn.GRUCell(num, name="dec_cell"),state_keep_prob=self.drop)