Пример #1
0
    def __init__(self, rnn_size, num_layers, batch_size, seq_length, vocab_size, grad_clip,\
                         infer=False):
        """
        Constructor for an RNN using LSTMs.
        @param rnn_size: The size of the RNN
        @param num_layers: The number of layers for the RNN to have
        @param batch_size: The batch size to train with
        @param seq_length: The length of the sequences to use in training
        @param vocab_size: The size of the vocab
        @param grad_clip: The point at which to clip the gradient in the gradient descent
        @param infer:
        """
        #TODO: During training, (and when sampling), the input to the RNN should be
        #      the list of ingredients that goes with that recipe text.
        if infer:
            batch_size = 1
            seq_length = 1

        cell_fn = rnn_cell.GRUCell  #BasicLSTMCell
        cell = cell_fn(rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers)

        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope("rnnlm"):
            softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])
            with (tf.device("/cpu:0")):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, rnn_size])
                inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(\
                                                    embedding, self.input_data))
                inputs = [tf.squeeze(inp, [1]) for inp in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        loop_func = loop if infer else None
        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state,\
                                        cell, loop_function=loop_func, scope="rnnlm")
        output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b

        self.probs = tf.nn.softmax(self.logits)

        loss = seq2seq.sequence_loss_by_example([self.logits],\
                            [tf.reshape(self.targets, [-1])],\
                            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #2
0
    def build_graph(self, test):
        """
        Builds an LSTM graph in TensorFlow.
        """
        if test:
            self.batch_size = 1
            self.seq_len = 1

        lstm_cell = rnn_cell.BasicLSTMCell(self.cell_size)
        self.cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)

        self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len])
        self.targets = tf.placeholder(tf.int32,
                                      [self.batch_size, self.seq_len])
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope('lstm_vars'):
            self.ws = tf.get_variable('ws', [self.cell_size, self.vocab_size])
            self.bs = tf.get_variable('bs', [self.vocab_size])

            with tf.device('/cpu:0'):
                self.embeddings = tf.get_variable(
                    'embeddings', [self.vocab_size, self.cell_size])

                input_embeddings = tf.nn.embedding_lookup(
                    self.embeddings, self.inputs)

                inputs_split = tf.split(1, self.seq_len, input_embeddings)
                inputs_split = [
                    tf.squeeze(input_, [1]) for input_ in inputs_split
                ]

        def loop(prev, _):
            prev = tf.matmul(prev, self.ws) + self.bs
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embeddings, prev_symbol)

        lstm_outputs_split, self.final_state = seq2seq.rnn_decoder(
            inputs_split,
            self.initial_state,
            self.cell,
            loop_function=loop if test else None,
            scope='lstm_vars')
        lstm_outputs = tf.reshape(tf.concat(1, lstm_outputs_split),
                                  [-1, self.cell_size])

        logits = tf.matmul(lstm_outputs, self.ws) + self.bs
        self.probs = tf.nn.softmax(logits)

        total_loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([self.batch_size * self.seq_len])], self.vocab_size)
        self.loss = tf.reduce_sum(total_loss) / self.batch_size / self.seq_len

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=c.L_RATE,
                                                name='optimizer')
        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step,
                                                name='train_op')
Пример #3
0
    def build_graph(self):
        config = self.config
        self.reader = utils.DataReader(seq_len=config.seq_length,
                                       batch_size=config.batch_size,
                                       data_filename=config.data_filename)

        self.cell = LayerNormFastWeightsBasicRNNCell(num_units=config.rnn_size)

        self.input_data = tf.placeholder(tf.int32, [None, config.input_length])
        self.targets = tf.placeholder(tf.int32, [None, 1])
        self.initial_state = self.cell.zero_state(
            tf.shape(self.targets)[0], tf.float32)
        self.initial_fast_weights = self.cell.zero_fast_weights(
            tf.shape(self.targets)[0], tf.float32)

        with tf.variable_scope("input_embedding"):
            embedding = tf.get_variable(
                "embedding", [config.vocab_size, config.embedding_size])
            inputs = tf.split(
                1, config.input_length,
                tf.nn.embedding_lookup(embedding, self.input_data))
            inputs = [tf.squeeze(input, [1]) for input in inputs]

        with tf.variable_scope("send_to_rnn"):
            state = (self.initial_state, self.initial_fast_weights)
            output = None

            for i, input in enumerate(inputs):
                if i > 0:
                    tf.get_variable_scope().reuse_variables()
                output, state = self.cell(input, state)

        with tf.variable_scope("softmax"):
            softmax_w = tf.get_variable("softmax_w",
                                        [config.rnn_size, config.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [config.vocab_size])
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            self.probs = tf.nn.softmax(self.logits)
            self.output = tf.cast(
                tf.reshape(tf.arg_max(self.probs, 1), [-1, 1]), tf.int32)
            self.accuracy = tf.reduce_mean(
                tf.cast(tf.equal(self.output, self.targets), tf.float32))

        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([config.batch_size])], config.vocab_size)

        self.cost = tf.reduce_mean(loss)
        self.final_state = state

        # self.lr = tf.Variable(0.001, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          config.grad_clip)
        optimizer = tf.train.AdamOptimizer()  # self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        self.summary_accuracy = tf.scalar_summary('accuracy', self.accuracy)
        tf.scalar_summary('cost', self.cost)
        self.summary_all = tf.merge_all_summaries()
Пример #4
0
  def __init__(self, vocabularySize, config_param):
    self.vocabularySize = vocabularySize
    self.config = config_param

    self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX")
    self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY")


    #Converting Input in an Embedded form
    with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically
      embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize])
      embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
      inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp)
      inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]


    #Define Tensor RNN
    singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
    self.multilayerRNN =  rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers)
    self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32)

    #Defining Logits
    hidden_layer_output, last_state = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state)
    hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size])
    self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize]))
    self._predictionSoftmax = tf.nn.softmax(self._logits)

    #Define the loss
    loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize)
    self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)

    self._final_state = last_state
Пример #5
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.rnncell == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.rnncell == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.rnncell == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("rnncell type not supported: {}".format(args.rnncell))

        cell = cell_fn(args.rnn_size)
        self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
	self.attn_length = 5
	self.attn_size = 32
	self.attention_states = tf.placeholder(tf.float32,[args.batch_size, self.attn_length, self.attn_size]) 
        with tf.variable_scope('rnnlm'):
            softmax_w = build_weight([args.rnn_size, args.vocab_size],name='soft_w')
            softmax_b = build_weight([args.vocab_size],name='soft_b')
            word_embedding = build_weight([args.vocab_size, args.embedding_size],name='word_embedding')
            inputs_list = tf.split(1, args.seq_length, tf.nn.embedding_lookup(word_embedding, self.input_data))
            inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list]
        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

	if not args.attention:
            outputs, last_state = seq2seq.rnn_decoder(inputs_list, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm')
	else:
            outputs, last_state = attention_decoder(inputs_list, self.initial_state, self.attention_states, self.cell, loop_function=loop if infer else None, scope='rnnlm')

        self.final_state = last_state
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
	# average loss for each word of each timestep
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
	self.var_trainable_op = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, self.var_trainable_op),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, self.var_trainable_op))
	self.initial_op = tf.global_variables_initializer()
	self.logfile = args.log_dir+str(datetime.datetime.strftime(datetime.datetime.now(),'%Y-%m-%d %H:%M:%S')+'.txt').replace(' ','').replace('/','')
	self.var_op = tf.global_variables()
	self.saver = tf.train.Saver(self.var_op,max_to_keep=4,keep_checkpoint_every_n_hours=1)
Пример #6
0
    def __init__(self, args, infer=False):
        self.args = args
        training = not infer
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)
        if training and args.dropout > 0:
            cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=1.0-args.dropout)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                self.embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.nn.embedding_lookup(self.embedding, self.input_data)
                if training and args.dropout > 0:
                    inputs = tf.nn.dropout(inputs, args.dropout)
                inputs = tf.split(1, args.seq_length, inputs)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        if not infer:
            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                    args.grad_clip)
            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #7
0
    def build_model(self):
        with tf.name_scope("batch_size"):
            self.batch_size = tf.shape(self.images)[0]
        with tf.variable_scope("rnnlm"):
            image_emb = tf.matmul(self.fc7, self.encode_img_W) + self.encode_img_b

            # Replicate self.seq_per_img times for each image embedding
            image_emb = tf.reshape(tf.tile(tf.expand_dims(image_emb, 1), [1, self.seq_per_img, 1]), [self.batch_size * self.seq_per_img, self.input_encoding_size])

            rnn_inputs = tf.split(1, self.seq_length + 1, tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1]))
            rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs]
            rnn_inputs = [image_emb] + rnn_inputs

            initial_state = self.cell.zero_state(self.batch_size * self.seq_per_img, tf.float32)

            outputs, last_state = seq2seq.rnn_decoder(rnn_inputs, initial_state, self.cell, loop_function=None)
            #outputs, last_state = tf.nn.rnn(self.cell, rnn_inputs, initial_state)

            self.logits = [tf.matmul(output, self.embed_word_W) + self.embed_word_b for output in outputs[1:]]
        with tf.variable_scope("loss"):
            loss = seq2seq.sequence_loss_by_example(self.logits,
                    [tf.squeeze(label, [1]) for label in tf.split(1, self.seq_length + 1, self.labels[:, 1:])], # self.labels[:,1:] is the target
                    [tf.squeeze(mask, [1]) for mask in tf.split(1, self.seq_length + 1, self.masks[:, 1:])])
            self.cost = tf.reduce_mean(loss)
        
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        self.cnn_lr = tf.Variable(0.0, trainable=False)

        # Collect the rnn variables, and create the optimizer of rnn
        tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm')
        optimizer = tf.train.AdamOptimizer(self.lr, beta1=0.8)
        grads = optimizer.compute_gradients(self.cost, tvars)
        grads_cliped = [(tf.clip_by_value(i, -self.opt.grad_clip, self.opt.grad_clip),j) for i,j in grads if not i is None]
        #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
        #        self.opt.grad_clip)
        self.train_op = optimizer.apply_gradients(grads_cliped)

        # Collect the cnn variables, and create the optimizer of cnn
        cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg16')
        cnn_optimizer = tf.train.AdamOptimizer(self.cnn_lr, beta1=0.8)     
        cnn_grads = cnn_optimizer.compute_gradients(self.cost, cnn_tvars)
        cnn_grads_cliped = [(tf.clip_by_value(i, -self.opt.grad_clip, self.opt.grad_clip),j) for i,j in cnn_grads if not i is None]
        #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars),
        #        self.opt.grad_clip)
        self.cnn_train_op = cnn_optimizer.apply_gradients(cnn_grads_cliped)

        tf.scalar_summary('training loss', self.cost)
        tf.scalar_summary('learning rate', self.lr)
        tf.scalar_summary('cnn learning rate', self.cnn_lr)
        #for i,j in cnn_grads:
            #if not i is None and j.name.startswith('vgg16_1'): 
                #tf.histogram_summary(j.name+'_v', j)
                #tf.histogram_summary(j.name+'_d', i)
        #for i,j in grads:
            #tf.histogram_summary(j.name+'_v', j)
            #tf.histogram_summary(j.name+'_d', i)
        self.summaries = tf.merge_all_summaries()
Пример #8
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, state_is_tuple=True)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.empirical_entropy = self.cost/np.log(2)
        tf.summary.scalar('Empircal_Entropy', self.empirical_entropy)
        
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.merged_summaries = tf.summary.merge_all()
Пример #9
0
    def __init__(self, config):
        self.batch_size = config.batch_size
        self.seq_length = config.seq_length

        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32,
                                          [self.batch_size, self.seq_length])
        self._targets = tf.placeholder(tf.int32,
                                       [self.batch_size, self.seq_length])

        #Define RNN tensor
        lstm_cell = rnn_cell.BasicLSTMCell(size, state_is_tuple=True)
        self.cells = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers,
                                           state_is_tuple=True)
        self._initial_state = self.cells.zero_state(self.batch_size,
                                                    tf.float32)

        #Converting Input in an Embedded form
        with tf.device(
                "/cpu:0"):  #Tells Tensorflow what GPU to use specifically
            embedding = tf.get_variable("embedding", [vocab_size, size])
            embeddingLookedUp = tf.nn.embedding_lookup(embedding,
                                                       self._input_data)
            inputs = tf.split(1, self.seq_length, embeddingLookedUp)
            inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]

        #Define softmax values
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])

        #Get hidden layer outputs
        hidden_layer_output, last_state = rnn.rnn(
            self.cells, inputTensorsAsList, initial_state=self._initial_state)
        hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output),
                                         [-1, size])
        self._logits = tf.nn.xw_plus_b(hidden_layer_output, softmax_w,
                                       softmax_b)
        self._predictionSoftmax = tf.nn.softmax(self._logits)

        #Define the loss function
        loss = seq2seq.sequence_loss_by_example(
            [self._logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([self.batch_size * self.seq_length])], vocab_size)
        self._cost = tf.div(tf.reduce_sum(loss), self.batch_size)
        self._final_state = last_state

        #Optimize gradient descent algorithm
        self._learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self._cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #10
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:  #When we sample, the batch and sequence lenght are = 1
            args.batch_size = 1
            args.seq_length = 1

        cell_fn = rnn_cell.BasicLSTMCell  #Define the internal cell structure
        cell = cell_fn(args.rnn_size, state_is_tuple=True)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                                 state_is_tuple=True)
        #Build the inputs and outputs placeholders, and start with a zero internal values
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable(
                "softmax_w", [args.rnn_size, args.vocab_size])  #Final w
            softmax_b = tf.get_variable("softmax_b",
                                        [args.vocab_size])  #Final bias
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.split(
                    1, args.seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #11
0
    def __init__(self, args, data, infer=False):
        if infer:
            args.batch_size = 1
            args.seq_length = 1
        with tf.name_scope('inputs'):
            self.input_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])
            self.target_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])

        with tf.name_scope('model'):
            self.cell = rnn_cell.BasicLSTMCell(args.state_size)
            self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers)
            self.initial_state = self.cell.zero_state(args.batch_size,
                                                      tf.float32)
            with tf.variable_scope('rnnlm'):
                w = tf.get_variable('softmax_w',
                                    [args.state_size, data.vocab_size])
                b = tf.get_variable('softmax_b', [data.vocab_size])
                with tf.device("/cpu:0"):
                    embedding = tf.get_variable(
                        'embedding', [data.vocab_size, args.state_size])
                    inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            outputs, last_state = tf.nn.dynamic_rnn(
                self.cell, inputs, initial_state=self.initial_state)

        with tf.name_scope('loss'):
            output = tf.reshape(outputs, [-1, args.state_size])

            self.logits = tf.matmul(output, w) + b
            self.probs = tf.nn.softmax(self.logits)
            self.last_state = last_state

            targets = tf.reshape(self.target_data, [-1])
            loss = seq2seq.sequence_loss_by_example(
                [self.logits], [targets],
                [tf.ones_like(targets, dtype=tf.float32)])
            self.cost = tf.reduce_sum(loss) / args.batch_size
            tf.scalar_summary('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            tf.scalar_summary('learning_rate', self.lr)

            optimizer = tf.train.AdamOptimizer(self.lr)
            tvars = tf.trainable_variables()
            grads = tf.gradients(self.cost, tvars)
            for g in grads:
                tf.histogram_summary(g.name, g)
            grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)

            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
            self.merged_op = tf.merge_all_summaries()
Пример #12
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, state_is_tuple=False)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                                 state_is_tuple=False)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, None])
        # the length of input sequence is variable.
        self.targets = tf.placeholder(tf.int32, [args.batch_size, None])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        outputs, last_state = tf.nn.dynamic_rnn(
            cell, inputs, initial_state=self.initial_state, scope='rnnlm')
        output = tf.reshape(outputs, [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        targets = tf.reshape(self.targets, [-1])
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [targets],
            [tf.ones_like(targets, dtype=tf.float32)], args.vocab_size)
        self.cost = tf.reduce_mean(loss)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
	def __init__(self, rnn_size, num_layers, batch_size, seq_length, vocabulary_size, gradient_clip, sample=False):

		lstm_cell = rnn_cell.BasicLSTMCell(num_units=rnn_size)

		# create the RNN cell, that is constructed from multiple lstm cells, by duplicating the lstm cell
		self.cell = rnn_cell.MultiRNNCell([lstm_cell] * num_layers)

		# Initial state is a matrix of zeros
		self.initial_state = self.cell.zero_state(batch_size, tf.float32)

		# Define the vectors that will hold Tensorflow state
		self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
		self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])

		# variable_scope is tensorflow best practice that allows us to recycle variables names with different scopes
		with tf.variable_scope(VARIABLE_SCOPE):
			softmax_w = tf.get_variable("softmax_w", [rnn_size, vocabulary_size])
			softmax_b = tf.get_variable("softmax_b", [vocabulary_size])
			with tf.device("/cpu:0"):
				embedding = tf.get_variable("embedding", [vocabulary_size, rnn_size])
				inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
				inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

		def loop_function(prev, _):
			prev = tf.matmul(prev, softmax_w) + softmax_b
			stop_gradient = tf.stop_gradient(tf.argmax(prev, 1))
			return tf.nn.embedding_lookup(embedding, stop_gradient)

		outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop_function if sample else None, scope=VARIABLE_SCOPE)
		output = tf.result_sentencehape(tf.concat(1, outputs), [-1, rnn_size])

		# Calculate the logits and probabilities for the tensor
		self.logits = tf.matmul(output, softmax_w) + softmax_b
		self.probabilities = tf.nn.softmax(self.logits)
		loss = seq2seq.sequence_loss_by_example([self.logits],
				[tf.result_sentencehape(self.targets, [-1])],
				[tf.ones([batch_size * seq_length])],
				vocabulary_size)
		self.cost = tf.reduce_sum(loss) / batch_size / seq_length
		self.final_state = last_state
		self.lr = tf.Variable(0.0, trainable=False)
		tvars = tf.trainable_variables()
		grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
				gradient_clip)
		optimizer = tf.train.AdamOptimizer(self.lr)
		self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #14
0
  def __init__(self, config=None, mode=None):
    self.config = config
    self.mode = mode

    self.reader = utils.DataReader(seq_len=config.seq_length, batch_size=config.batch_size, data_filename=config.data_filename)

    self.cell = rnn_cell.BasicLSTMCell(config.rnn_size, state_is_tuple=True)

    self.input_data = tf.placeholder(tf.int32, [None, config.input_length])
    self.targets = tf.placeholder(tf.int32, [None, 1])
    self.initial_state = self.cell.zero_state(tf.shape(self.targets)[0], tf.float32)

    with tf.variable_scope("input_embedding"):
      embedding = tf.get_variable("embedding", [config.vocab_size, config.rnn_size])
      inputs = tf.split(1, config.input_length, tf.nn.embedding_lookup(embedding, self.input_data))
      inputs = [tf.squeeze(input, [1]) for input in inputs]

    with tf.variable_scope("send_to_rnn"):
      state = self.initial_state
      output = None

      for i, input in enumerate(inputs):
        if i > 0:
          tf.get_variable_scope().reuse_variables()
        output, state = self.cell(input, state)

    with tf.variable_scope("softmax"):
      softmax_w = tf.get_variable("softmax_w", [config.rnn_size, config.vocab_size])
      softmax_b = tf.get_variable("softmax_b", [config.vocab_size])
      self.logits = tf.matmul(output, softmax_w) + softmax_b
      self.probs = tf.nn.softmax(self.logits)

    loss = seq2seq.sequence_loss_by_example([self.logits],
                                            [tf.reshape(self.targets, [-1])],
                                            [tf.ones([config.batch_size])],
                                            config.vocab_size)

    self.cost = tf.reduce_mean(loss)
    self.final_state = state

    # self.lr = tf.Variable(0.001, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                      config.grad_clip)
    optimizer = tf.train.AdamOptimizer()#self.lr)
    self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #15
0
    def train_neural_network():
        logits, last_state, probs, cell, initial_state,inputs = neural_network()
        targets = tf.reshape(output_targets, [-1])
        loss = seq2seq.sequence_loss_by_example([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)],
                                                datalen)
        cost = tf.reduce_mean(loss)
        learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(zip(grads, tvars))

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())

            saver = tf.train.Saver(tf.all_variables())

            for epoch in range(50):
                sess.run(tf.assign(learning_rate, 0.002 * (0.97 ** epoch)))
                n = 0
                for batche in range(n_chunk):
                    train_loss, _, _ = sess.run([cost, last_state, train_op],
                                            feed_dict={input_data: x_batches[n], output_targets: y_batches[n]})
                    n += 1
                    print(epoch, batche, train_loss)
                    #print inputs.eval(feed_dict={input_data: x_batches[n]})




                if epoch % 7 == 0:
                    saver.save(sess, 'thundermodule', global_step=epoch)
                if epoch == 3:
                    # sess.run(tf.initialize_all_variables())

                    # saver = tf.train.Saver(tf.all_variables())
                    # saver.restore(sess, 'thundermodule-7')
                    state_ = sess.run(cell.zero_state(1, tf.float32))

                    # problabels=probs.eval(feed_dict={input_data: x_batches[0][1], initial_state: state_})
                    [probs_, state_] = sess.run([probs, last_state],
                                                feed_dict={input_data: np.array(x_batches[0][0]).reshape(1, 5),
                                                           initial_state: state_})
                    out = GetPredata(probs_, datas)
                    print out
Пример #16
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer == True:
            args.batch_size = 1
            args.seq_length = 1
        #

        cell = rnn_cell.BasicLSTMCell(args.state_size)  #
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
            b = tf.get_variable('softmax_b', [args.vocab_size])

            with tf.device('/cpu:0'):
                embedding = tf.get_variable('embedding',
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        outputs, last_state = tf.nn.dynamic_rnn(
            self.cell, inputs, initial_state=self.initial_state, scope='rnnlm')
        output = tf.reshape(outputs, [-1, args.rnn_size])

        self.logits = tf.matmul(output, w) + b
        self.probs = tf.nn.softmax(self.logits)
        targets = tf.reshape(self.targets, [-1])
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [targets],
            [tf.ones_like(targets, dtype=tf.float32)])
        self.cost = tf.reduce_mean(loss)

        self.last_state = last_state

        self.lr = tf.Variable(0.0, trainable=False)  #
        optimizer = tf.train.AdamOptimizer(self.lr)
        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #17
0
 def __init__(self, args):
     self.args = args
     self.dropout = tf.Variable(trainable=False,
                                dtype=tf.float32,
                                initial_value=0)
     cell = rnn_cell.LSTMCell(args.hidden, state_is_tuple=True)
     cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                  state_is_tuple=True)
     self.cell = tf.nn.rnn_cell.DropoutWrapper(
         cell, output_keep_prob=self.dropout)
     self.input_data = tf.placeholder(
         tf.float32, [args.batch_size, args.seq_length, args.seq_dim])
     self.output_data = tf.placeholder(tf.int32, [args.batch_size])
     self.initial_state = cell.zero_state(args.batch_size, tf.float32)
     with tf.variable_scope('rnn_audio'):
         rnn_weights = tf.get_variable("rnn_weights",
                                       [args.hidden, args.num_classes])
         rnn_bias = tf.get_variable("rnn_bias", [args.num_classes])
         with tf.device("/cpu:0"):
             inputs = tf.split(1, args.seq_length, self.input_data)
             inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
     outputs, last_state = seq2seq.rnn_decoder(inputs,
                                               self.initial_state,
                                               cell,
                                               scope='rnn_audio')
     output = outputs[-1]
     self.logits = tf.matmul(output, rnn_weights) + rnn_bias
     self.probabilities = tf.nn.softmax(self.logits)
     loss = seq2seq.sequence_loss_by_example([self.logits],
                                             [self.output_data],
                                             [tf.ones([args.batch_size])],
                                             args.num_classes)
     self.cost = tf.reduce_mean(loss)
     self.final_state = last_state
     self.lr = tf.Variable(0.0, trainable=False)
     train_vars = tf.trainable_variables()
     grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, train_vars),
                                       5)
     optimizer = tf.train.AdamOptimizer(self.lr)
     self.train_op = optimizer.apply_gradients(zip(grads, train_vars))
Пример #18
0
    def _init_tensorflow(self, infer: bool = False):
        """
        Deferred importing of tensorflow and initializing model for training
        or sampling.

        This is necessary for two reasons: first, the tensorflow graph is
        different for training and inference, so must be reset when switching
        between modes. Second, importing tensorflow takes a long time, so
        we only want to do it if we actually need to.

        Arguments:
            infer (bool): If True, initialize model for inference. If False,
                initialize model for training.

        Returns:
            module: imported TensorFlow module
        """
        import tensorflow as tf
        from tensorflow.python.ops import rnn_cell
        from tensorflow.python.ops import seq2seq

        # Use self.tensorflow_state to mark whether or not model is configured
        # for training or inference.
        try:
            if self.tensorflow_state == infer:
                return tf
        except AttributeError:
            pass

        self.cell_fn = {
            "lstm": rnn_cell.BasicLSTMCell,
            "gru": rnn_cell.GRUCell,
            "rnn": rnn_cell.BasicRNNCell
        }.get(self.model_type, None)
        if self.cell_fn is None:
            raise clgen.UserError("Unrecognized model type")

        # reset the graph when switching between training and inference
        tf.reset_default_graph()

        # corpus info:
        batch_size = 1 if infer else self.corpus.batch_size
        seq_length = 1 if infer else self.corpus.seq_length
        vocab_size = self.corpus.vocab_size

        fs.mkdir(self.cache.path)

        cell = self.cell_fn(self.rnn_size, state_is_tuple=True)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers,
                                                 state_is_tuple=True)
        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)

        scope_name = 'rnnlm'
        with tf.variable_scope(scope_name):
            softmax_w = tf.get_variable("softmax_w",
                                        [self.rnn_size, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])

            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, self.rnn_size])
                inputs = tf.split(
                    1, seq_length,
                    tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope=scope_name)
        output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        self.epoch = tf.Variable(0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # set model status
        self.tensorflow_state = infer

        return tf
Пример #19
0
    def __init__(self, args, infer=False): # infer is set to true during sampling.
        self.args = args
        if infer:
            # Worry about one character at a time during sampling; no batching or BPTT.
            args.batch_size = 1
            args.seq_length = 1

        # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM.
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell
        # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM).
        cell = cell_fn(args.rnn_size, state_is_tuple=True)

        # Use the same rnn_cell library to create a stack of these cells
        # of num_layers layers. Pass in a python list of these cells.
        # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in
        # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True)

        # Create two TF placeholder nodes of 32-bit ints (NOT floats!),
        # each of shape batch_size x seq_length. This shape matches the batches
        # (listed in x_batches and y_batches) constructed in create_batches in utils.py.
        # input_data will receive input batches, and targets will be what it compares against
        # to calculate loss.
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        # Using the zero_state function in the RNNCell master class in rnn_cell library,
        # create a tensor of zeros such that we can swap it in for the network state at any time
        # to zero out the network's state.
        # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers.
        # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600,
        # and initial_state would have a dimension of none x 600.
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

        # Scope our new variables to the scope identifier string "rnnlm".
        with tf.variable_scope('rnnlm'):
            # Create new variable softmax_w and softmax_b for output.
            # softmax_w is a weights matrix from the top layer of the model (of size rnn_size)
            # to the vocabulary output (of size vocab_size).
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            # softmax_b is a bias vector of the ouput characters (of size vocab_size).
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.]
            with tf.device("/cpu:0"):
                # Create new variable named 'embedding' to connect the character input to the base layer
                # of the RNN. Its role is the conceptual inverse of softmax_w.
                # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN.
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data).
                # This tensor has dimensions batch_size x seq_length x rnn_size.
                # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1).
                # Thus inputs is a list of seq_length different tensors,
                # each of dimension batch_size x 1 x rnn_size.
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1,
                # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size.
                # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size.
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # THIS LOOP FUNCTION IS NEVER ACTUALLY USED.
        # IT IS EXPLICITLY NOT USED DURING TRAINING.
        # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT
        # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST.
        # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training.
        # prev is a 2D Tensor of shape [batch_size x cell.output_size].
        # returns a 2D Tensor of shape [batch_size x cell.input_size].
        def loop(prev, _):
            # prev is initially the top cell state.
            # Convert the top cell state into character logits.
            prev = tf.matmul(prev, softmax_w) + softmax_b
            # Pull the character with the greatest logit (no sampling, just argmaxing).
            # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY?
            # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION?
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            # Re-embed that symbol as the next step's input, and return that.
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # Set up a seq2seq decoder from the seq2seq.py library.
        # This constructs the outputs and states nodes of the network.
        # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size].
        # These are the raw output values of the top layer of the network at each time step.
        # They have NOT been fed through the decoder projection; they are still in network space,
        # not character space.
        # State is a tensor of shape [batch_size x cell.state_size].
        # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined.
        outputs, self.final_state = seq2seq.rnn_decoder(inputs,
                self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        # tf.concat concatenates the output tensors along the rnn_size dimension,
        # to make a single tensor of shape [batch_size x (seq_length * rnn_size)].
        # This gives the following 2D outputs matrix:
        #   [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)]
        #   [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)]
        #   ...
        #   [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)]
        # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size].
        # Output will now be the following matrix:
        #   [rnn output: batch 0, seq 0]
        #   [rnn output: batch 0, seq 1]
        #   ...
        #   [rnn output: batch 0, seq seq_len-1]
        #   [rnn output: batch 1, seq 0]
        #   [rnn output: batch 1, seq 1]
        #   ...
        #   [rnn output: batch 1, seq seq_len-1]
        #   ...
        #   ...
        #   [rnn output: batch batch_size-1, seq seq_len-1]
        # Note the following comment in rnn_cell.py:
        #   Note: in many cases it may be more efficient to not use this wrapper,
        #   but instead concatenate the whole sequence of your outputs in time,
        #   do the projection on this batch-concatenated sequence, then split it
        #   if needed or directly feed into a softmax.
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        # Obtain logits node by applying output weights and biases to the output tensor.
        # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size],
        # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size].
        # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size].
        # In other words, that multiplication converts a loooong list of rnn_size vectors
        # to a loooong list of vocab_size vectors.
        # Then add softmax_b (a single vocab-sized vector) to every row of that list.
        # That gives you the logits!
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        # Convert logits to probabilities. Probs isn't used during training! That node is never calculated.
        # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # During sampling, this means it is of shape [1 x vocab_size].
        self.probs = tf.nn.softmax(self.logits)
        # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity
        # for each sequence. (Size is batch_size * seq_length.)
        # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout:
        #   target character (batch 0, seq 0)
        #   target character (batch 0, seq 1)
        #   ...
        #   target character (batch 0, seq seq_len-1)
        #   target character (batch 1, seq 0)
        #   ...
        # These targets are compared to the logits to generate loss.
        # Logits: instead of a list of character indices, it's a list of character index probability vectors.
        # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors
        # implicitly represented by the target characters against the probability distrutions in logits.
        # It returns a 1D float tensor (a vector) where item i is the log-perplexity of
        # the comparison of the ith logit distribution to the ith one-hot target vector.
        loss = seq2seq.sequence_loss_by_example([self.logits], # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size]
                [tf.reshape(self.targets, [-1])], # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits
                [tf.ones([args.batch_size * args.seq_length])], # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits
                args.vocab_size) # num_decoder_symbols: integer, number of decoder symbols (output classes)
        # Cost is the arithmetic mean of the values of the loss tensor
        # (the sum divided by the total number of elements).
        # It is a single-element floating point tensor. This is what the optimizer seeks to minimize.
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        # Create a summary for our cost.
        tf.scalar_summary("cost", self.cost)
        # Create a node to track the learning rate as it decays through the epochs.
        self.lr = tf.Variable(args.learning_rate, trainable=False)
        self.global_epoch_fraction = tf.Variable(0.0, trainable=False)
        self.global_seconds_elapsed = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables() # tvars is a python list of all trainable TF Variable objects.

        # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx).
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr) # Use ADAM optimizer with the current learning rate.
        # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor).
        # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer.
        # This is the op that a training session should be instructed to perform.
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.summary_op = tf.merge_all_summaries()
Пример #20
0
    def __init__(self, CellType, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps],
                                         name="input_data")
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps],
                                      name="targets")

        lstm_cell = CellType(size)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        # initializer used for reusable variable initializer (see `get_variable`)
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        initializer=initializer)
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self.initial_state

        with tf.variable_scope("RNN", initializer=initializer):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()

                inputs_slice = inputs[:, time_step, :]
                (cell_output, state) = cell(inputs_slice, state)

                outputs.append(cell_output)
                states.append(state)

        self.final_state = states[-1]

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        w = tf.get_variable("softmax_w", [size, vocab_size],
                            initializer=initializer)
        b = tf.get_variable("softmax_b", [vocab_size], initializer=initializer)

        logits = tf.nn.xw_plus_b(output, w, b)  # compute logits for loss
        targets = tf.reshape(self.targets, [-1])  # reshape our target outputs
        weights = tf.ones([batch_size * num_steps
                           ])  # used to scale the loss average

        # computes loss and performs softmax on our fully-connected output layer
        loss = sequence_loss_by_example([logits], [targets], [weights],
                                        vocab_size)
        self.cost = cost = tf.div(tf.reduce_sum(loss), batch_size, name="cost")

        if is_training:
            # setup learning rate variable to decay
            self.lr = tf.Variable(1.0, trainable=False)

            # define training operation and clip the gradients
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                              config.max_grad_norm)
            optimizer = tf.train.GradientDescentOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                      name="train")
        else:
            # if this model isn't for training (i.e. testing/validation) then we don't do anything here
            self.train_op = tf.no_op()
    def __init__(self, config, mode='TRAIN', loaded_word_embed=None):
        """Builds the computing graph and initializes all variabels.

        Args:
            config: Configuration object contains all model configuration.
            mode: String from {'TRAIN', 'EVAL', 'INFER'}.
            loaded_word_embed: A numpy array of pretrained word embedding.
        """
        # Initilizes model parameters.
        self.batch_size = batch_size = config.batch_size
        self.vocab_size = vocab_size = config.vocab_size
        self.embed_dim = embed_dim = config.embed_dim
        self.hidden_dim = hidden_dim = config.hidden_dim
        self.num_hiddens = num_hiddens = config.num_hiddens
        self.num_modes = num_modes = config.num_modes
        self.mode_dim = mode_dim = config.mode_dim
        self.cmt_seq_len = cmt_seq_len = config.cmt_seq_len
        self.reply_seq_len = reply_seq_len = config.reply_seq_len
        # Objective weight for reply language modeling.
        self.alpha = alpha = config.alpha

        # Initializes placeholders for inputs.
        self.comment_inputs = []
        self.comment_weights = []
        self.reply_inputs = []
        self.reply_weights = []

        self._lr = tf.Variable(0.0, trainable=False)

        for i in xrange(cmt_seq_len):
            self.comment_inputs.append(
                tf.placeholder(tf.int32,
                               name='comment_input_{0}'.format(i),
                               shape=[batch_size]))
            self.comment_weights.append(
                tf.placeholder(tf.float32,
                               name='comment_weight_{0}'.format(i),
                               shape=[batch_size]))
        for i in xrange(reply_seq_len):
            self.reply_inputs.append(
                tf.placeholder(tf.int32,
                               name='reply_input_{0}'.format(i),
                               shape=[batch_size]))
            self.reply_weights.append(
                tf.placeholder(tf.float32,
                               name='reply_weight_{0}'.format(i),
                               shape=[batch_size]))

        self.comment_embeds = []
        self.mix_mode_embeds = []
        self.mode_probs = []
        self.init_reply_embed = []

        # Initlize mode_rnn.
        if mode == 'TRAIN' and config.keep_prob < 1.0:
            mode_rnn = tf.nn.rnn_cell.MultiRNNCell([
                tf.nn.rnn_cell.DropoutWrapper(
                    tf.nn.rnn_cell.BasicLSTMCell(
                        hidden_dim, forget_bias=config.forget_bias,
                        state_is_tuple=True),
                    output_keep_prob=config.keep_prob)
                for _ in xrange(num_hiddens)], state_is_tuple=True)
        else:
            mode_rnn = tf.nn.rnn_cell.MultiRNNCell([
                tf.nn.rnn_cell.BasicLSTMCell(
                    hidden_dim, forget_bias=config.forget_bias,
                    state_is_tuple=True)
                for _ in xrange(num_hiddens)], state_is_tuple=True)

        # Defines the modes.
        batch_mode_inds = tf.constant([range(num_modes)
                                       for _ in range(batch_size)])

        # Defines the embeddings on CPU.
        with tf.device('/cpu:0'):
            mode_embedding = tf.get_variable(
                'mode_embedding',
                [num_modes, mode_dim], dtype=tf.float32)
            att_mode_vecs = tf.nn.embedding_lookup(
                mode_embedding, batch_mode_inds)
            att_states = tf.reshape(
                att_mode_vecs, [-1, num_modes, 1, mode_dim])

        att_mode_weight = tf.get_variable('att_mode_weight',
                                          [1, 1, mode_dim, hidden_dim])

        mode_feat = tf.nn.conv2d(
            att_states, att_mode_weight,
            [1, 1, 1, 1], 'SAME')
        att_v = tf.get_variable('att_v', [hidden_dim])

        def single_attention(query):
            with tf.variable_scope('attention_mlp'):
                y = linear(query, hidden_dim, True)
                y = tf.reshape(y, [-1, 1, 1, hidden_dim])
                s = tf.reduce_sum(att_v * tf.tanh(mode_feat + y), [2, 3])
                a_score = tf.nn.softmax(s)
                weighted_sum = tf.reduce_sum(
                    tf.reshape(a_score, [-1, num_modes, 1, 1]) * att_states,
                    [1, 2])
                a_score = tf.reshape(a_score, [-1, num_modes])
                weighted_sum = tf.reshape(weighted_sum, [-1, mode_dim])
            return a_score, weighted_sum

        with tf.device('/cpu:0'):
            if loaded_word_embed is None:
                embed_weight = tf.get_variable('word_embedding',
                                               [vocab_size, embed_dim])
            else:
                pretrain_word_embed = tf.constant(loaded_word_embed)
                embed_weight = tf.get_variable('word_embedding',
                                               initializer=pretrain_word_embed)

        cmt_state = mode_rnn.zero_state(batch_size, tf.float32)
        c_prev, cell_output = cmt_state[0]

        # Computes the residual value of content and global modes.
        att_proj_weight = tf.get_variable('att_proj_weight',
                                          [mode_dim, hidden_dim])
        att_probs, attns = single_attention(cell_output)
        cell_output += tf.matmul(attns, att_proj_weight)
        cmt_state = [tf.nn.rnn_cell.LSTMStateTuple(c_prev, cell_output)]

        mode_rnn_cell_output = []
        mode_probs = []
        lm_logits = []

        with tf.variable_scope('mode_rnn'):
            for i, cmt_in in enumerate(self.comment_inputs):
                if i > 0: tf.get_variable_scope().reuse_variables()
                cmt_embeds = tf.reshape(
                    tf.nn.embedding_lookup(embed_weight, cmt_in),
                    [batch_size, embed_dim])

                cell_output, cmt_state = mode_rnn(cmt_embeds, cmt_state)
                mode_rnn_cell_output.append(cell_output)
                att_probs, attns = single_attention(cell_output)

                c_prev, _ = cmt_state[0]
                cell_output += tf.matmul(attns, att_proj_weight)

                cmt_state = [tf.nn.rnn_cell.LSTMStateTuple(c_prev, cell_output)]

                with tf.variable_scope('attention_projection'):
                    attention_proj = linear(cell_output, vocab_size, True)

                lm_logits.append(attention_proj)
                mode_probs.append(att_probs)
                if mode == 'INFER':
                    self.mix_mode_embeds.append(attns)

        if mode == 'INFER':
            self.comment_embeds = mode_rnn_cell_output
            self.mode_probs = mode_probs

        top_states = [tf.reshape(e, [-1, 1, mode_rnn.output_size])
                      for e in mode_rnn_cell_output]
        states_for_reply_rnn = tf.concat(1, top_states)

        reply_embeds = [
            tf.reshape(tf.nn.embedding_lookup(embed_weight, reply_i),
                       [batch_size, embed_dim]) for reply_i in self.reply_inputs[:-1]]

        # Initlize reply_rnn.
        if mode == 'TRAIN' and config.keep_prob < 1.0:
            reply_rnn = tf.nn.rnn_cell.MultiRNNCell([
                tf.nn.rnn_cell.DropoutWrapper(
                    tf.nn.rnn_cell.BasicLSTMCell(
                        hidden_dim, forget_bias=config.forget_bias,
                        state_is_tuple=True),
                    output_keep_prob=config.keep_prob)
                for _ in xrange(num_hiddens)], state_is_tuple=True)
        else:
            reply_rnn = tf.nn.rnn_cell.MultiRNNCell([
                tf.nn.rnn_cell.BasicLSTMCell(
                    hidden_dim, forget_bias=config.forget_bias,
                    state_is_tuple=True)
                for _ in xrange(num_hiddens)], state_is_tuple=True)

        reply_rnn_output, reply_rnn_final_state = attention_decoder(
            reply_embeds, cmt_state, states_for_reply_rnn, reply_rnn)

        if mode == 'INFER':
            self.init_reply_embed = reply_rnn_output[0]

        # Computes the language model loss for the comment.
        comment_targets = [cc for cc in self.comment_inputs[1:]]
        lm_loss = tf.reduce_sum(sequence_loss_by_example(
            lm_logits[:-1], comment_targets, self.comment_weights[1:]))

        gen_logits = []
        with tf.variable_scope('gen_logit_projection'):
            for i, rnn_out in enumerate(reply_rnn_output):
                if i > 0: tf.get_variable_scope().reuse_variables()
                logits = linear(rnn_out, vocab_size, True)
                gen_logits.append(logits)

        # Computes the lanuage model loss for the reply.
        reply_targets = [tt for tt in self.reply_inputs[1:]]
        gen_loss = tf.reduce_sum(sequence_loss_by_example(
            gen_logits, reply_targets, self.reply_weights[1:]))

        loss = lm_loss + alpha * gen_loss
        self.total_loss = loss

        self.saver = tf.train.Saver(tf.all_variables())

        if mode != 'TRAIN':
            return

        tvars = tf.trainable_variables()
        grads = tf.gradients(loss, tvars)

        if config.opt_method == 'SGD':
            optimizer = tf.train.GradientDescentOptimizer(self._lr)
        elif config.opt_method == 'AdaDelta':
            optimizer = tf.train.AdadeltaOptimizer(self._lr)
        elif config.opt_method == 'Adam':
            optimizer = tf.train.AdamOptimizer(self._lr)
        else:
            ValueError('Unknown optimizer {}'.format(config.opt_method))
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #22
0
    def __init__(self, args, infer=False):
        """
        Args:
            infer: whether the model is used for training or inference.
            If doing inference, we need to do two things:
              1. Feed in one word at a time.
              2. Give a loop function to the rnn decoder, in order to
                 feed the previous step output into the next step.
                 Inside the loop function, we prevent gradient updates.
        """
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        # Can also experiment with using rnn_cell.BasicGRUCell here.
        cell_constructor = rnn_cell.BasicLSTMCell

        cell = cell_constructor(args.rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        # for training, targets is input_data shifted by one word.
        # see example in text_loader_tests
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])

        # Init hidden state to all zeroes.
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            # Dimensions should be:
            # Output                   *         w            +      b
            # [batch_size, rnn_size] * [rnn_size, vocab_size] + [vocab_size]
            softmax_w = tf.get_variable('softmax_w', [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable('softmax_b', args.vocab_size)

            # Word embedding.
            # Always place word embedding lookup on the CPU, and save GPU for
            # running the forward and backward pass of the LSTM.
            # Experience from running char-rnn on my GTX 1070 + 6820HK:
            # CPU utilization was about 16% during training, and GPU utilization was about 90%.
            with tf.device("/cpu:0"):
                # We learn this during training, hence this matrix is also a variable.
                # Each row is the word vector for one word.
                # TODO: consider visualizing this embedding using TSNE after training.
                embedding = tf.get_variable('embedding', [args.vocab_size, args.rnn_size])

                # Dimensions: [batch_size, seq_length, word_vector_length==rnn_size]
                embedding_lookup = tf.nn.embedding_lookup(embedding, self.input_data)
                # Split into a list of records, each with dimension: [batch_size, 1, word_vector_length]
                # This is to match tensorflow's LSTM impl: it expects a list of inputs, each is a time step.
                inputs = tf.split(1, args.seq_length, embedding_lookup)
                # Note that tensorflow wants a 2D matrix for each time step, not 3D. So remove dimension 1.
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # While doing inference, we predict a word at each time step, then we feed the prediction
        # back into the LSTM decoder for the next timestep. This is done by giving this loop
        # function to the rnn decoder.
        # Second arg is the step number. We don't use it here.
        def loop(prev, _):
            # Dimensions:
            #            prev           *          w             +      b
            # [batch_size==1, rnn_size] * [rnn_size, vocab_size] + [vocab_size]
            prev = tf.matmul(prev, softmax_w) + softmax_b
            symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, symbol)

        # last_state has dimension [batch_size, cell.state_size==rnn_size]
        # outputs is a list of records, one for each timestep of dimension [batch_size, rnn_size]
        outputs, last_state = seq2seq.rnn_decoder(
            inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')

        # note that outputs is a list and cannot be multiplied with w.
        # we first reshape outputs to make it [batch_size * seq_length, rnn_size],
        # so we can multiple it with w.
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state

        # This allows variable learning rate during the training.
        # I.e. we can decrease this over time.
        # Notice the 'trainable=False' flag: we don't want to backprop into lr!
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #23
0
def build_model(words_size, embedding_size, oseq_len, source_len,
                simplified_len, encoder_hidden, decoder_hidden, lstm_layer,
                batch_size, source_nfilters, source_width, is_train):

    args = construct_data(words_size=words_size,
                          embedding_size=embedding_size,
                          source_len=source_len,
                          simplified_len=simplified_len,
                          oseq_len=oseq_len,
                          encoder_hidden=encoder_hidden,
                          decoder_hidden=decoder_hidden,
                          source_nfilters=source_nfilters,
                          source_width=source_width)

    embedding = args['embedding']
    conv_args = args['conv_args']
    weigth_generation = args['weigth_generation']
    bias_generation = args['bias_generation']
    source = args['source']
    defendant = args['defendant']
    defendant_length = args['defendant_length']
    label = args['label']
    decoder_inputs = args['decoder_inputs']
    loss_weights = args['loss_weights']
    keep_prob = args['keep_prob']
    sample_rate = args['sample_rate']

    conv_encoder = encoder_conv(source=source,
                                defendant=defendant,
                                conv_args=conv_args,
                                keep_prob=keep_prob,
                                embedding=embedding,
                                is_train=is_train)

    rnn_encoder, encoder_states = encoder_rnn(
        defendant=defendant,
        defendant_length=defendant_length,
        encoder_hidden=encoder_hidden,
        keep_prob=keep_prob,
        batch_size=batch_size,
        embedding=embedding)

    rnn_decoder, state_decoder = decoder_rnn(
        conv_encoder=conv_encoder,
        rnn_encoder=rnn_encoder,
        encoder_states=encoder_states,
        defendant=defendant,
        decoder_inputs=decoder_inputs,
        decoder_hidden=decoder_hidden,
        weigth_generation=weigth_generation,
        bias_generation=bias_generation,
        n_steps=oseq_len,
        batch_size=batch_size,
        lstm_layer=lstm_layer,
        keep_prob=keep_prob,
        embedding=embedding,
        sample_rate=sample_rate,
        is_train=is_train)

    cost = tf.reduce_mean(
        seq2seq.sequence_loss_by_example(
            logits=rnn_decoder,
            targets=tf.unpack(tf.transpose(label, [1, 0])),
            weights=tf.unpack(
                tf.transpose(
                    tf.convert_to_tensor(loss_weights, dtype=tf.float32),
                    [1, 0]))))

    words_prediction = tf.argmax(tf.transpose(tf.pack(rnn_decoder), [1, 0, 2]),
                                 2)

    print('build model ')

    return {
        'outputs': rnn_decoder,
        'embedding': embedding,
        'cost': cost,
        'sample_rate': sample_rate,
        'words_prediction': words_prediction,
        'source': source,
        'defendant': defendant,
        'defendant_length': defendant_length,
        'label': label,
        'decoder_inputs': decoder_inputs,
        'loss_weights': loss_weights,
        'keep_prob': keep_prob
    }
Пример #24
0
    def __init__(self,
                 config,
                 pretrained_embeddings=None,
                 update_embeddings=True,
                 is_training=False):

        self.config = config
        self.batch_size = batch_size = config.batch_size
        self.hidden_size = hidden_size = config.hidden_size
        self.num_layers = 1
        self.vocab_size = config.vocab_size
        self.prem_steps = config.prem_steps
        self.hyp_steps = config.hyp_steps
        self.is_training = is_training
        # placeholders for inputs
        self.premise = tf.placeholder(tf.int32, [batch_size, self.prem_steps])
        self.hypothesis = tf.placeholder(tf.int32,
                                         [batch_size, self.hyp_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, 3])

        if pretrained_embeddings is not None:
            embedding = tf.get_variable(
                'embedding', [self.vocab_size, self.config.embedding_size],
                dtype=tf.float32,
                trainable=update_embeddings)
            self.embedding_placeholder = tf.placeholder(
                tf.float32, [self.vocab_size, self.config.embedding_size])
            self.embedding_init = embedding.assign(self.embedding_placeholder)
        else:
            embedding = tf.get_variable('embedding',
                                        [self.vocab_size, self.hidden_size],
                                        dtype=tf.float32)

        # create lists of (batch,step,hidden_size) inputs for models
        premise_inputs = tf.nn.embedding_lookup(embedding, self.premise)
        hypothesis_inputs = tf.nn.embedding_lookup(embedding, self.hypothesis)

        if pretrained_embeddings is not None:
            with tf.variable_scope("input_projection"):
                premise_inputs = input_projection3D(premise_inputs,
                                                    self.hidden_size)
            with tf.variable_scope("input_projection", reuse=True):
                hypothesis_inputs = input_projection3D(hypothesis_inputs,
                                                       self.hidden_size)

        # run FF networks over inputs
        with tf.variable_scope("FF"):
            prem_attn = self.feed_forward_attention(premise_inputs)
        with tf.variable_scope("FF", reuse=True):
            hyp_attn = self.feed_forward_attention(hypothesis_inputs)

        # This is doing all the dot-products for the feedforward attention at once.
        # get activations, shape: (batch, prem_steps, hyp_steps )
        dot = tf.batch_matmul(prem_attn, hyp_attn, adj_y=True)

        hypothesis_softmax = tf.reshape(dot, [
            batch_size * self.prem_steps,
            -1,
        ])  #(300,10)
        hypothesis_softmax = tf.expand_dims(tf.nn.softmax(hypothesis_softmax),
                                            2)

        dot = tf.transpose(
            dot,
            [0, 2, 1])  # switch dimensions so we don't screw the reshape up

        premise_softmax = tf.reshape(
            dot, [batch_size * self.hyp_steps, -1])  #(200,15)
        premise_softmax = tf.expand_dims(tf.nn.softmax(premise_softmax), 2)

        # this is very ugly: we make a copy of the original input for each of the steps
        # in the opposite sentence, multiply with softmax weights, sum and reshape.
        alphas = tf.reduce_sum(
            premise_softmax * tf.tile(premise_inputs, [self.hyp_steps, 1, 1]),
            [1])
        betas = tf.reduce_sum(
            hypothesis_softmax *
            tf.tile(hypothesis_inputs, [self.prem_steps, 1, 1]), [1])

        # this is a list of (batch, hidden dim) tensors of hyp_steps length
        alphas = [
            tf.squeeze(x) for x in tf.split(
                1, self.hyp_steps,
                tf.reshape(alphas, [batch_size, -1, self.hidden_size]))
        ]
        # this is a list of (batch, hidden dim) tensors of prem_steps length
        betas = [
            tf.squeeze(x) for x in tf.split(
                1, self.prem_steps,
                tf.reshape(betas, [batch_size, -1, self.hidden_size]))
        ]

        # list of original premise vecs to go with betas
        prem_list = [
            tf.squeeze(single_input, [1])
            for single_input in tf.split(1, self.prem_steps, premise_inputs)
        ]

        # list of original hypothesis vecs to go with alphas
        hyp_list = [
            tf.squeeze(single_input, [1])
            for single_input in tf.split(1, self.hyp_steps, hypothesis_inputs)
        ]

        beta_concat_prems = []
        alpha_concat_hyps = []

        # append the relevant alpha/beta to the original word representation
        for input, rep in zip(prem_list, betas):
            beta_concat_prems.append(tf.concat(1, [input, rep]))

        for input, rep in zip(hyp_list, alphas):
            alpha_concat_hyps.append(tf.concat(1, [input, rep]))

        # send both through a feedforward network with shared parameters
        with tf.variable_scope("compare"):
            prem_comparison_vecs = tf.split(
                0, self.prem_steps,
                self.feedforward_network(tf.concat(0, beta_concat_prems)))

        with tf.variable_scope("compare", reuse=True):
            hyp_comparison_vecs = tf.split(
                0, self.hyp_steps,
                self.feedforward_network(tf.concat(0, alpha_concat_hyps)))

        # add representations and send through last classifier
        sum_prem_vec = tf.add_n(prem_comparison_vecs)
        sum_hyp_vec = tf.add_n(hyp_comparison_vecs)

        with tf.variable_scope("final_representation"):
            final_representation = self.feedforward_network(
                tf.concat(1, [sum_prem_vec, sum_hyp_vec]))

        # softmax over outputs to generate distribution over [neutral, entailment, contradiction]
        softmax_w = tf.get_variable("softmax_w", [4 * hidden_size, 3])
        softmax_b = tf.get_variable("softmax_b", [3])
        self.logits = tf.matmul(final_representation,
                                softmax_w) + softmax_b  # dim (batch_size, 3)

        _, targets = tf.nn.top_k(self.targets)

        loss = seq2seq.sequence_loss_by_example([self.logits], [targets],
                                                [tf.ones([batch_size])], 3)
        self.cost = tf.reduce_mean(loss)

        _, logit_max_index = tf.nn.top_k(self.logits)

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(logit_max_index, targets), tf.float32))

        if is_training:

            self.lr = tf.Variable(self.config.learning_rate, trainable=False)

            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                              self.config.max_grad_norm)

            #optimizer = tf.train.GradientDescentOptimizer(self.lr)
            optimizer = tf.train.AdagradOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #25
0
    def __init__(self, config):
        """Init model from provided configuration
		
		Args:
		    config (dict): Model's configuration
		    	Should have:
				rnn_size: 	size of RNN hidden state
				num_layers: number of RNN layers
				rnn_type:	lstm, rnn, or gru
				batch_size:	batch size
				seq_length: sequence length
				grad_clip: 	Clip gradient value by this value
				vocab_size: size of vocabulary
				infer:		True/False, if True, use the predicted output
							to feed back to RNN insted of gold target output.
				is_train:	True if is training
		"""

        logger.info("Create model with options: \n{}".format(pprint.pformat(config)))
        self.rnn_size = config["rnn_size"]
        self.num_layers = config["num_layers"]
        self.rnn_type = config["rnn_type"]
        self.batch_size = config["batch_size"]
        self.seq_length = config["seq_length"]
        self.grad_clip = config["grad_clip"]
        self.vocab_size = config["vocab_size"]
        self.infer = config["infer"]
        self.is_train = config["is_train"]
        self.reuse = config["reuse"]

        if self.infer:
            self.batch_size = 1
            self.seq_length = 1

        if self.rnn_type == "rnn":
            cell_fn = rnn_cell.BasicRNNCell
        elif self.rnn_type == "gru":
            cell_fn = rnn_cell.GRUCell
        elif self.rnn_type == "lstm":
            cell_fn = rnn_cell.LSTMCell
        else:
            msg = "Rnn type should be either rnn, gru or lstm"
            logger.error(msg)
            sys.exit(msg)

            # Define the cell
        cell = cell_fn(self.rnn_size)
        # Create multiple layers RNN
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers)

        self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
        self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
        self.initial_state = cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope(MODEL_SCOPE, reuse=self.reuse):
            softmax_w = tf.get_variable("softmax_w", [self.rnn_size, self.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [self.vocab_size])

            # Model params stored in DEVICE_SCOPE (here using GPU)
            with tf.device(DEVICE_SCOPE):
                embeddings = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size])

                # Split it into list of step input, i.e. along dimension 1
                inputs = tf.split(1, self.seq_length, tf.nn.embedding_lookup(embeddings, self.input_data))
                """
				tf.split works like numply.split, inputs is now a list of step
				inputs (to rnn). Each step input has shape (batch_size, 1, rnn_size).
				We don't need that dimension 1, remove it by squeezing.
				"""
                inputs = [tf.squeeze(_input, [1]) for _input in inputs]

            """
			Instead of writing the neuralnet manually, use seq2seq.rnn_decoder.
			In test time, the predicted output is fed back to RNN instead of 
			gold target output like in training time.
			"""

            def loop(prev, _):
                prev = tf.matmul(prev, softmax_w) + softmax_b
                # Wow, this stop_gradient is cool
                prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
                return tf.nn.embedding_lookup(embeddings, prev_symbol)

            outputs, last_state = seq2seq.rnn_decoder(
                inputs, self.initial_state, cell, loop_function=loop if self.infer else None, scope=MODEL_SCOPE
            )
            # Concat each sequence of the batch
            output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size])  # now (batch_size x seq_length) x rnn_size
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            self.probs = tf.nn.softmax(self.logits)
            loss = seq2seq.sequence_loss_by_example(
                [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([self.batch_size * self.seq_length])]
            )
            self.cost = tf.reduce_sum(loss) / (self.batch_size * self.seq_length)
            self.final_state = last_state

            if not self.is_train:
                return

            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip)

            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
    def __init__(self, args, embedding):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length],
                                         name='STAND_input')
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length],
                                      name='STAND_targets')
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.embedding = embedding
        with tf.variable_scope('STAND'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            inputs = tf.split(
                1, args.seq_length,
                tf.nn.embedding_lookup(self.embedding, self.input_data))
            inputs = map(lambda i: tf.nn.l2_normalize(i, 1),
                         [tf.squeeze(input_, [1]) for input_ in inputs])

        def loop(prev, i):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.l2_normalize(
                tf.nn.embedding_lookup(embedding, prev_symbol), 1)

        o, _ = seq2seq.rnn_decoder(inputs,
                                   self.initial_state,
                                   cell,
                                   loop_function=None,
                                   scope='STAND')
        with tf.variable_scope('STAND', reuse=True) as scope:
            sf_o, _ = seq2seq.rnn_decoder(inputs,
                                          self.initial_state,
                                          cell,
                                          loop_function=loop,
                                          scope=scope)
        output = tf.reshape(tf.concat(1, o), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        sf_output = tf.reshape(tf.concat(1, sf_o), [-1, args.rnn_size])
        self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b
        self.self_feed_probs = tf.nn.softmax(self_feed_logits)

        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          args.grad_clip)
        for g, v in zip(grads, tvars):
            print v.name
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #27
0
    def __init__(self,
                 config,
                 pretrained_embeddings=None,
                 update_embeddings=True,
                 is_training=False):

        self.config = config
        self.batch_size = batch_size = config.batch_size
        self.hidden_size = hidden_size = config.hidden_size
        self.num_layers = 1
        self.vocab_size = config.vocab_size
        self.prem_steps = config.prem_steps
        self.hyp_steps = config.hyp_steps
        self.is_training = is_training
        # placeholders for inputs
        self.premise = tf.placeholder(tf.int32, [batch_size, self.prem_steps])
        self.hypothesis = tf.placeholder(tf.int32,
                                         [batch_size, self.hyp_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, 3])

        if pretrained_embeddings is not None:
            embedding = tf.get_variable(
                'embedding', [self.vocab_size, self.config.embedding_size],
                dtype=tf.float32,
                trainable=update_embeddings)
            self.embedding_placeholder = tf.placeholder(
                tf.float32, [self.vocab_size, self.config.embedding_size])
            self.embedding_init = embedding.assign(self.embedding_placeholder)
        else:
            embedding = tf.get_variable('embedding',
                                        [self.vocab_size, self.hidden_size],
                                        dtype=tf.float32)

        # create lists of (batch,step,hidden_size) inputs for models
        premise_inputs = tf.nn.embedding_lookup(embedding, self.premise)
        hypothesis_inputs = tf.nn.embedding_lookup(embedding, self.hypothesis)

        if pretrained_embeddings is not None:
            with tf.variable_scope("input_projection"):
                premise_inputs = input_projection3D(premise_inputs,
                                                    self.hidden_size)
            with tf.variable_scope("input_projection", reuse=True):
                hypothesis_inputs = input_projection3D(hypothesis_inputs,
                                                       self.hidden_size)

        # run FF networks over inputs
        with tf.variable_scope("FF"):
            prem_attn = self.feed_forward_attention(premise_inputs)
        with tf.variable_scope("FF", reuse=True):
            hyp_attn = self.feed_forward_attention(hypothesis_inputs)

        # get activations, shape: (batch, prem_steps, hyp_steps )
        dot = tf.batch_matmul(prem_attn, hyp_attn, adj_y=True)

        hypothesis_softmax = tf.reshape(dot, [
            batch_size * self.prem_steps,
            -1,
        ])  #(300,10)
        hypothesis_softmax = tf.expand_dims(tf.nn.softmax(hypothesis_softmax),
                                            2)

        dot = tf.transpose(dot, [0, 2, 1])

        premise_softmax = tf.reshape(
            dot, [batch_size * self.hyp_steps, -1])  #(200,15)
        premise_softmax = tf.expand_dims(tf.nn.softmax(premise_softmax), 2)

        # this is very ugly: we make a copy of the original input for each of the steps
        # in the opposite sentence, multiply with softmax weights, sum and reshape.
        alphas = tf.reduce_sum(
            premise_softmax * tf.tile(premise_inputs, [self.hyp_steps, 1, 1]),
            [1])
        betas = tf.reduce_sum(
            hypothesis_softmax *
            tf.tile(hypothesis_inputs, [self.prem_steps, 1, 1]), [1])

        # this is (batch, hyp_steps, hidden dim )
        alphas = [
            tf.squeeze(x, [1]) for x in tf.split(
                1, self.hyp_steps,
                tf.reshape(alphas, [batch_size, -1, self.hidden_size]))
        ]
        # this is (batch, prem_steps, hidden dim)
        betas = [
            tf.squeeze(x, [1]) for x in tf.split(
                1, self.prem_steps,
                tf.reshape(betas, [batch_size, -1, self.hidden_size]))
        ]

        # list of original premise vecs to go with betas
        prem_list = [
            tf.squeeze(single_input, [1])
            for single_input in tf.split(1, self.prem_steps, premise_inputs)
        ]

        # list of original hypothesis vecs to go with alphas
        hyp_list = [
            tf.squeeze(single_input, [1])
            for single_input in tf.split(1, self.hyp_steps, hypothesis_inputs)
        ]

        beta_concat_prems = []
        alpha_concat_hyps = []
        for input, rep in zip(prem_list, betas):
            beta_concat_prems.append(tf.concat(1, [input, rep]))

        for input, rep in zip(hyp_list, alphas):
            alpha_concat_hyps.append(tf.concat(1, [input, rep]))

        prem_comparison_vecs = tf.concat(
            1, [tf.expand_dims(x, 1) for x in beta_concat_prems])
        hyp_comparison_vecs = tf.concat(
            1, [tf.expand_dims(x, 1) for x in alpha_concat_hyps])

        with tf.variable_scope("gru_inference"):
            inference = rnn_cell.GRUCell(self.config.inference_size)
            self.inference_cell = rnn_cell.MultiRNNCell([inference] *
                                                        self.num_layers)
            self.inference_state = self.inference_cell.zero_state(
                self.batch_size, tf.float32)

        with tf.variable_scope("inference"):
            final_representation, remainders, self.iterations = self.do_inference_steps(
                self.inference_state, prem_comparison_vecs,
                hyp_comparison_vecs)

        # softmax over outputs to generate distribution over [neutral, entailment, contradiction]
        softmax_w = tf.get_variable("softmax_w",
                                    [self.config.inference_size, 3])
        softmax_b = tf.get_variable("softmax_b", [3])
        self.logits = tf.matmul(final_representation,
                                softmax_w) + softmax_b  # dim (batch_size, 3)

        _, targets = tf.nn.top_k(self.targets)

        loss = seq2seq.sequence_loss_by_example([self.logits], [targets],
                                                [tf.ones([self.batch_size])],
                                                3)
        self.cost = tf.reduce_mean(loss)

        _, logit_max_index = tf.nn.top_k(self.logits)

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(logit_max_index, targets), tf.float32))

        self.per_step_accs, self.per_step_dists = self.evaluate_representation(
        )

        if is_training:

            self.lr = tf.Variable(self.config.learning_rate, trainable=False)

            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                              self.config.max_grad_norm)

            #optimizer = tf.train.GradientDescentOptimizer(self.lr)
            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #28
0
def main():
    print("Start generating lycrics")

    # Initialize train and test data
    batch_size = FLAGS.batch_size
    epoch_number = FLAGS.epoch_number
    sequece_length = 20
    rnn_hidden_units = 100
    stacked_layer_nubmer = 3

    # TODO: Use python 3 for encoding for Chinese
    #lycrics_filepath = "./data/jay_lyrics.txt"
    lycrics_filepath = "./data/shakespeare.txt"
    #with open(lycrics_filepath) as f:
    import codecs
    f = codecs.open(lycrics_filepath, encoding='utf-8')
    lycrics_data = f.read()

    words = list(set(lycrics_data))
    words.sort()
    vocabulary_size = len(words)
    char_id_map = {}
    id_char_map = {}
    for index, char in enumerate(words):
        id_char_map[index] = char
        char_id_map[char] = index

    train_dataset = []
    train_labels = []
    index = 0
    for i in range(batch_size):
        features = lycrics_data[index:index + sequece_length]
        labels = lycrics_data[index + 1:index + sequece_length + 1]
        index += sequece_length

        features = [char_id_map[word] for word in features]
        labels = [char_id_map[word] for word in labels]

        train_dataset.append(features)
        train_labels.append(labels)

    # Define the model
    batch_size = FLAGS.batch_size
    mode = FLAGS.mode

    if mode == "inference":
        batch_size = 1
        sequece_length = 1

    x = tf.placeholder(tf.int32, shape=(None, sequece_length))
    y = tf.placeholder(tf.int32, shape=(None, sequece_length))
    epoch_number = FLAGS.epoch_number
    checkpoint_dir = FLAGS.checkpoint_dir
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    tensorboard_dir = FLAGS.tensorboard_dir

    checkpoint_file = checkpoint_dir + "/checkpoint.ckpt"
    steps_to_validate = FLAGS.steps_to_validate

    def lstm_inference(x):
        pass

    def stacked_lstm_inference(x):
        lstm_cell = rnn_cell.BasicLSTMCell(rnn_hidden_units)
        lstm_cells = rnn_cell.MultiRNNCell([lstm_cell] * stacked_layer_nubmer)
        initial_state = lstm_cells.zero_state(batch_size, tf.float32)

        with tf.variable_scope("stacked_lstm"):
            weights = tf.get_variable("weights",
                                      [rnn_hidden_units, vocabulary_size])
            bias = tf.get_variable("bias", [vocabulary_size])
            embedding = tf.get_variable("embedding",
                                        [vocabulary_size, rnn_hidden_units])

        inputs = tf.nn.embedding_lookup(embedding, x)
        outputs, last_state = tf.nn.dynamic_rnn(lstm_cells,
                                                inputs,
                                                initial_state=initial_state)

        output = tf.reshape(outputs, [-1, rnn_hidden_units])
        logits = tf.add(tf.matmul(output, weights), bias)

        return logits, lstm_cells, initial_state, last_state

    def inference(inputs):
        print("Use the model: {}".format(FLAGS.model))
        if FLAGS.model == "lstm":
            return lstm_inference(inputs)
        elif FLAGS.model == "stacked_lstm":
            return stacked_lstm_inference(inputs)
        else:
            print("Unknow model, exit now")
            exit(1)

    # Define train op
    logits, lstm_cells, initial_state, last_state = inference(x)
    #loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logit,
    #                                                                     y))

    targets = tf.reshape(y, [-1])
    loss = seq2seq.sequence_loss_by_example(
        [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)])
    loss = tf.reduce_sum(loss)

    predict_softmax = tf.nn.softmax(logits)

    learning_rate = FLAGS.learning_rate
    print("Use the optimizer: {}".format(FLAGS.optimizer))
    if FLAGS.optimizer == "sgd":
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    elif FLAGS.optimizer == "adadelta":
        optimizer = tf.train.AdadeltaOptimizer(learning_rate)
    elif FLAGS.optimizer == "adagrad":
        optimizer = tf.train.AdagradOptimizer(learning_rate)
    elif FLAGS.optimizer == "adam":
        optimizer = tf.train.AdamOptimizer(learning_rate)
    elif FLAGS.optimizer == "ftrl":
        optimizer = tf.train.FtrlOptimizer(learning_rate)
    elif FLAGS.optimizer == "rmsprop":
        optimizer = tf.train.RMSPropOptimizer(learning_rate)
    else:
        print("Unknow optimizer: {}, exit now".format(FLAGS.optimizer))
        exit(1)

    global_step = tf.Variable(0, name='global_step', trainable=False)
    train_op = optimizer.minimize(loss, global_step=global_step)

    saver = tf.train.Saver()
    tf.scalar_summary('loss', loss)
    init_op = tf.initialize_all_variables()

    # Create session to run graph
    with tf.Session() as sess:
        summary_op = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter(tensorboard_dir, sess.graph)
        sess.run(init_op)

        if mode == "train":
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                print("Continue training from the model {}".format(
                    ckpt.model_checkpoint_path))
                saver.restore(sess, ckpt.model_checkpoint_path)

            start_time = datetime.datetime.now()
            for epoch in range(epoch_number):

                _, loss_value, step = sess.run([train_op, loss, global_step],
                                               feed_dict={
                                                   x: train_dataset,
                                                   y: train_labels
                                               })

                if epoch % steps_to_validate == 0:
                    end_time = datetime.datetime.now()

                    print("[{}] Epoch: {}, loss: {}".format(
                        end_time - start_time, epoch, loss_value))

                    saver.save(sess, checkpoint_file, global_step=step)
                    #writer.add_summary(summary_value, step)
                    start_time = end_time

        elif mode == "inference":
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                print("Load the model {}".format(ckpt.model_checkpoint_path))
                saver.restore(sess, ckpt.model_checkpoint_path)

            start_time = datetime.datetime.now()

            word = FLAGS.inference_start_word
            generate_word_number = 100
            generate_lyrics = word

            state = sess.run(lstm_cells.zero_state(1, tf.float32))

            for i in range(generate_word_number):
                x2 = np.zeros((1, 1))
                x2[0, 0] = char_id_map[word]

                prediction, state = sess.run([predict_softmax, last_state],
                                             feed_dict={
                                                 x: x2,
                                                 initial_state: state
                                             })
                predict_word_id = np.argmax(prediction[0])

                word = id_char_map[predict_word_id]
                generate_lyrics += word

            end_time = datetime.datetime.now()
            print("[{}] Generated lyrics:\n{}".format(end_time - start_time,
                                                      generate_lyrics))

        else:
            print("Unknow mode, please choose 'train' or 'inference'")

    print("End of generating lycrics")
Пример #29
0
    def __init__(self,args,infer=False):
        self.args=args
        if infer:
            args.batch_size=1
            args.seq_length=1
        if args.model=='rnn':
            cell_fn=rnn_cell.BasicRNNCell
        elif args.model=='gru':
            cell_fn=rnn_cell.GRUCell
        elif args.model=='lstm':
            cell_fn=rnn_cell.BasicLSTMCell
        else:
            raise Exception("模型不支持:{}".format(args.model))

        cell=cell_fn(args.rnn_size)

        self.cell=cell=rnn_cell.MultiRNNCell([cell]*args.num_layers)

        self.input_data=tf.placeholder(tf.int32,[args.batch_size,args.seq_length]) #(10,25)

        self.targets=tf.placeholder(tf.int32,[args.batch_size,args.seq_length])

        self.initial_state=cell.zero_state(args.batch_size,tf.float32)

        
        #因为想要达到变量共享的效果, 就要在 tf.variable_scope()的作用域下使用 tf.get_variable() 这种方式产生和提取变量. 
        #不像 tf.Variable() 每次都会产生新的变量, tf.get_variable() 如果遇到了已经存在名字的变量时, 它会单纯的提取这个同样名字的变量,
        #如果不存在名字的变量再创建.
        with tf.variable_scope("rnnlm"):
            softmax_w=tf.get_variable("softmax_w",[args.rnn_size,args.vocab_size])  #args.vocab_size=19,19个方法
            softmax_b=tf.get_variable("softmax_b",[args.vocab_size])
            #attention=tf.get_variable("attention",[1,1,args.vocab_size])
            '''
            with tf.device("/cpu:0"):
                embedding=tf.get_variable("embedding",[args.vocab_size,args.rnn_size])
                
                #输入数据 self.input_data 的维度是 (batch_size , seq_length)
                #而输出的input_embedding 的维度成为 (batch_size ,num_steps ,rnn_size).   就是一个立方体,每个样例就是从头顶上削一片下来

                #词嵌入后成了这样一个三维数组,里面每一个元素是一个二维数组(25,32)
                temp=tf.nn.embedding_lookup(embedding,self.input_data)   #(10,25,32)

                
                #tf.split()函数将长方体按每一列切片,切成了25个片,每一片都是(10,32),表示这是这一批样本们的第t个特征,即在第xt时间步传入的input,embedding代替了ont-hot
                inputs=tf.split(1,args.seq_length,temp)   #len(inputs)=25
                #print(inputs[0].shape)    (10,1,32)
                #删除维度1  (10,32)   #每个数据从一列变成了一个扁平的长方形
                inputs=[tf.squeeze(input_,[1]) for input_ in inputs]
        '''
        '''
        def loop(prev,_):
            prev=tf.matmul(prev,softmax_w)+softmax_b
            
            #axis=1的时候,将每一行最大元素所在的索引记录下来,最后返回每一行最大元素所在的索引数组
            prev_symbol=tf.stop_gradient(tf.argmax(prev,1))
            #stop_gradients也是一个list,list中的元素是tensorflow graph中的op,
            # 一旦进入这个list,将不会被计算梯度,更重要的是,在该op之后的BP计算都不会运行
            return tf.nn.embedding_lookup(embedding,prev_symbol)
        '''
        
        inputs=tf.split(1,args.seq_length,self.input_data)
        inputs=[tf.squeeze(input_,[1]) for input_ in inputs]

        #inputss=[tf.reshape(self.input_data[:,i],-1) for i in range(args.seq_length)]
        
        outputs,last_state=seq2seq.embedding_attention_seq2seq(inputs,inputs,cell,args.vocab_size,args.vocab_size,
                                                                args.rnn_size)

        #outputs,last_state=seq2seq.attention_decoder(inputs,self.initial_state, attention,cell,loop_function=loop if infer else None,scope='rnnlm')
        #outputs,last_state=seq2seq.rnn_decoder(inputs,self.initial_state,cell,loop_function=loop if infer else None,scope='rnnlm')
        
        self.saved_outputs=outputs 
        
        #print(len(outputs))  #是一个三维数组,有25个元素,对应步长,每个元素是一个二维数组(10,32)
        output=tf.reshape(tf.concat(1,outputs),[-1,args.vocab_size])
        #print(output)     //(250,32),将这25个(10,32)的二维数组按行堆叠了起来,行数变成了10*25
        
        #网络的最后输出(相当于最后添加了一个全连接层)
        #self.logits=tf.matmul(output,softmax_w)+softmax_b  #(250,19)
        self.logits=output
        #过一个softmax
        
        self.probs=tf.nn.softmax(self.logits)


        #参数要求:output [batch*numsteps, vocab_size]
        #target, [batch_size, num_steps]
        #weight:[tf.ones([batch_size * num_steps]
        #output具体的维度讲解见chrome"https://blog.csdn.net/xyz1584172808/article/details/83056179?depth_1-utm_source=distribute.pc_relevant.none-task&utm_source=distribute.pc_relevant.none-task"
        loss=seq2seq.sequence_loss_by_example([self.logits],[tf.reshape(self.targets,[-1])],[tf.ones([args.batch_size*args.seq_length])],args.vocab_size)


        self.cost=tf.reduce_sum(loss)/args.batch_size/args.seq_length
        self.final_state=last_state

        self.lr=tf.Variable(0.0,trainable=False)
        tvars=tf.trainable_variables()

        grads,_=tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip)


        optimizer=tf.train.AdamOptimizer(self.lr)
        self.train_op=optimizer.apply_gradients(zip(grads,tvars))
Пример #30
0
    def __init__(self, args):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        ##
        ## input data will be of dimension
        ## shape = (batch_size, seq_length, invocab_size)
        ##
        self.input_data = tf.placeholder(tf.float32, 
                                         [args.batch_size, 
                                          args.seq_length, 
                                          args.char_size])

        ##
        ## target data will be of dimension
        ## shape = (batch_size, seq_length)
        ## NOTE : out dim not specified here
        ##
        self.targets = tf.placeholder(tf.int32, 
                                      [args.batch_size, 
                                       args.seq_length])

        ##
        ## initial state is of size batch_size * state_size
        ## this is equivalent to tf.zeros([batch_size, state_size])
        ##
        self.initial_state = cell.zero_state(args.batch_size, 
                                             tf.float32)

        ##
        ## input and final softmax layer outputs
        ## here we specify the out dimention
        ##
        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", 
                                        [args.rnn_size, 
                                         args.phvocab_size])
            softmax_b = tf.get_variable("softmax_b", 
                                        [args.phvocab_size])

            ##
            ## unrolling of the input to sequence length
            ## and removing the 1 dim
            ##
            inputs = tf.split(1, args.seq_length, self.input_data)
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        ##
        ## simple rnn decoder. Simple meaning without attention
        ## last_state is the final state from rnn after specified
        ## sequence length. 
        ## last_state is the thought vector
        ##
        outputs, last_state = seq2seq.rnn_decoder(inputs, 
                                                  self.initial_state, 
                                                  cell, 
                                                  scope='rnnlm')

        ##
        ## outputs is a list of size sequence length.
        ## Each list element is of dimention batch_size * rnn_size
        ## i.e for each unrolled input, there will be one output state
        ## (last state) each will be of dimension rnn_size.
        ##
        outconcat = tf.concat(1, outputs)
        output = tf.reshape(outconcat, [-1, args.rnn_size])

        ##
        ## final logit layer
        ## NOTE : x * W (where x is batch * rnn_size)
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        ##
        ## cost function
        ## 
        reshaped_target = tf.reshape(self.targets, [-1]),
        seq_weight = tf.ones([args.batch_size * args.seq_length])
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [reshaped_target], [seq_weight], args.phvocab_size)

        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state

        ##
        ## Optimizer
        ## Adam optimizer and gradient clipping
        ##
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, 
                                                       tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)

        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #31
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])  #(3, 2)

        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])     #(3, 2)

        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32)

        self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1)

        self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False)

        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)

        tf.summary.scalar("time_batch", self.batch_time)

        def variable_summaries(var):
            """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                #with tf.name_scope('stddev'):
                #   stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                #tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                #tf.summary.histogram('histogram', var)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])    #(4, 7)
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])  #7
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])  #(7,4)
                inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #32
0
    def build_graph(self, test):
        """
        Builds an LSTM graph in TensorFlow.
        """
        if test:
            self.batch_size = 1
            self.seq_len = 1

        ##
        # LSTM Cells
        ##

        lstm_cell = rnn_cell.BasicLSTMCell(self.cell_size)
        self.cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)

        ##
        # Data
        ##

        # inputs and targets are 2D tensors of shape
        self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len])
        self.targets = tf.placeholder(tf.int32,
                                      [self.batch_size, self.seq_len])
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        ##
        # Variables
        ##
        with tf.variable_scope('lstm_vars'):
            self.ws = tf.get_variable('ws', [self.cell_size, self.vocab_size])
            self.bs = tf.get_variable('bs',
                                      [self.vocab_size])  # TODO: initializer?
            with tf.device('/cpu:0'
                           ):  # put on CPU to parallelize for faster training/
                self.embeddings = tf.get_variable(
                    'embeddings', [self.vocab_size, self.cell_size])

                # get embeddings for all input words
                input_embeddings = tf.nn.embedding_lookup(
                    self.embeddings, self.inputs)
                # The split splits this tensor into a seq_len long list of 3D tensors of shape
                # [batch_size, 1, rnn_size]. The squeeze removes the 1 dimension from the 1st axis
                # of each tensor
                inputs_split = tf.split(1, self.seq_len, input_embeddings)
                inputs_split = [
                    tf.squeeze(input_, [1]) for input_ in inputs_split
                ]

                # inputs_split looks like this:
                # [
                #   tensor_<0>([
                #       [batchElt<0>_wordEmbedding<0>],
                #       ...,
                #       [batchElt<batch_size - 1>_wordEmbedding<0>]
                #   ]),
                #   ...,
                #   tensor_<seq_len - 1>([
                #       [batchElt<0>_wordEmbedding<seq_len - 1>],
                #       ...,
                #       [batchElt<batch_size - 1>_wordEmbedding<seq_len - 1>]
                #   ])
                # ]

        def loop(prev, _):
            prev = tf.matmul(prev, self.ws) + self.bs
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embeddings, prev_symbol)

        lstm_outputs_split, self.final_state = seq2seq.rnn_decoder(
            inputs_split,
            self.initial_state,
            self.cell,
            loop_function=loop if test else None,
            scope='lstm_vars')
        lstm_outputs = tf.reshape(tf.concat(1, lstm_outputs_split),
                                  [-1, self.cell_size])

        # outputs looks like this:
        # [
        #   tensor_<0>([
        #       [batchElt<0>_outputEmbedding<0>],
        #       ...,
        #       [batchElt<batch_size - 1>_outputEmbedding<0>]
        #   ]),
        #   ...,
        #   tensor_<seq_len - 1>([
        #       [batchElt<0>_outputEmbedding<seq_len - 1>],
        #       ...,
        #       [batchElt<batch_size - 1>_outputEmbedding<seq_len - 1>]
        #   ])
        # ]

        # output looks like this:
        # tensor([
        #     [batchElt<0>_outputEmbedding<0>],
        #     ...,
        #     [batchElt<0>_outputEmbedding<seq_len - 1>],
        #     [batchElt<1>_outputEmbedding<0>],
        #     ...,
        #     [batchElt<1>_outputEmbedding<seq_len - 1>],
        #     ...
        #     [batchElt<batch_size - 1>_outputEmbedding<0>],
        #     ...,
        #     [batchElt<batch_size - 1>_outputEmbedding<seq_len - 1>]
        # ])

        logits = tf.matmul(lstm_outputs, self.ws) + self.bs
        self.probs = tf.nn.softmax(logits)

        ##
        # Train
        ##

        total_loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([self.batch_size * self.seq_len])], self.vocab_size)
        self.loss = tf.reduce_sum(total_loss) / self.batch_size / self.seq_len

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=c.L_RATE,
                                                name='optimizer')
        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step,
                                                name='train_op')
Пример #33
0
    def __init__(self, args, infer=False):
        """
        数据预处理完成以后,接下来就是建立seq2seq模型了。建立模型主要分为三步:
        确定好编码器和解码器中cell的结构,即采用什么循环单元,多少个神经元以及多少个循环层;
        将输入数据转化成tensorflow的seq2seq.rnn_decoder需要的格式,并得到最终的输出以及最后一个隐含状态;
        将输出数据经过softmax层得到概率分布,并且得到误差函数,确定梯度下降优化器;

        由于tensorflow提供的rnncell共有三种,分别是RNN、GRU、LSTM,因此这里我们也提供三种选择,并且每一种都可以使用多层结构,
        即MultiRNNCell
        :param args: 
        :param infer: 
        """
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.rnncell == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.rnncell == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.rnncell == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("rnncell type not supported: {}".format(
                args.rnncell))

        cell = cell_fn(args.rnn_size)
        self.cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
        with tf.variable_scope('rnnlm'):
            softmax_w = build_weight([args.rnn_size, args.vocab_size],
                                     name='soft_w')
            softmax_b = build_weight([args.vocab_size], name='soft_b')
            word_embedding = build_weight(
                [args.vocab_size, args.embedding_size], name='word_embedding')
            inputs_list = tf.split(
                1, args.seq_length,
                tf.nn.embedding_lookup(word_embedding, self.input_data))
            inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(word_embedding, prev_symbol)

        # 用于建立seq2seq的函数,rnn_decoder以及attention_decoder
        if not args.attention:
            outputs, last_state = seq2seq.rnn_decoder(
                inputs_list,
                self.initial_state,
                self.cell,
                loop_function=loop if infer else None,
                scope='rnnlm')
            # rnn_decoder函数主要有四个参数
            # decoder_inputs其实就是输入的数据,要求的格式为一个list,并且list中的tensor大小应该为[batch_size,input_size],
            # 换句话说这个list的长度就是seq_length;但我们原始的输入数据的维度为[args.batch_size, args.seq_length],
            # 是不是感觉缺少了一个input_size维度,其实这个维度就是word_embedding的维度,或者说word2vec的大小,
            # 这里需要我们手动进行word_embedding,并且这个embedding矩阵是一个可以学习的参数

            # initial_state是cell的初始状态,其维度是[batch_size,cell.state_size],
            # 由于rnn_cell模块提供了对状态的初始化函数,因此我们可以直接调用

            # cell就是我们要构建的解码器和编码器的cell,上面已经提过了。
            # 最后一个参数是loop_function,其作用是在生成的时候,我们需要把解码器上一时刻的输出作为下一时刻的输入,
            # 并且这个loop_function需要我们自己写

            # 其中outputs是与decoder_inputs同样维度的量,即每一时刻的输出;
            # last_state的维度是[batch_size,cell.state_size],即最后时刻的所有cell的状态。
            # 接下来需要outputs来确定目标函数,而last-state的作用是作为抽样生成函数下一时刻的状态

        else:
            self.attn_length = 5
            self.attn_size = 32
            self.attention_states = build_weight(
                [args.batch_size, self.attn_length, self.attn_size])
            outputs, last_state = seq2seq.attention_decoder(
                inputs_list,
                self.initial_state,
                self.attention_states,
                self.cell,
                loop_function=loop if infer else None,
                scope='rnnlm')

        self.final_state = last_state
        output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)

        # tensorflow中提供了sequence_loss_by_example函数用于按照权重来计算整个序列中每个单词的交叉熵,
        # 返回的是每个序列的log-perplexity。为了使用sequence_loss_by_example函数,
        # 我们首先需要将outputs通过一个前向层,同时我们需要得到一个softmax概率分布

        # average loss for each word of each timestep
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        self.var_trainable_op = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, self.var_trainable_op), args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)

        # train_op即为训练时需要运行的
        self.train_op = optimizer.apply_gradients(
            zip(grads, self.var_trainable_op))
        self.initial_op = tf.global_variables_initializer()
        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=5,
                                    keep_checkpoint_every_n_hours=1)
        self.logfile = args.log_dir + str(
            datetime.datetime.strftime(datetime.datetime.now(),
                                       '%Y-%m-%d %H:%M:%S') + '.txt').replace(
                                           ' ', '').replace('/', '')
        self.var_op = tf.global_variables()
Пример #34
0
    def __init__(self, config, pretrained_embeddings=None,
                 update_embeddings=True, is_training=False):

        self.config = config
        self.bidirectional = config.bidirectional
        self.batch_size = batch_size = config.batch_size
        self.hidden_size = hidden_size = config.hidden_size
        self.num_layers = 1
        self.vocab_size = config.vocab_size
        self.prem_steps = config.prem_steps
        self.hyp_steps = config.hyp_steps
        self.is_training = is_training
        # placeholders for inputs
        self.premise = tf.placeholder(tf.int32, [batch_size, self.prem_steps])
        self.hypothesis = tf.placeholder(tf.int32, [batch_size, self.hyp_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, 3])


        if pretrained_embeddings is not None:
            embedding = tf.get_variable('embedding', [self.vocab_size, self.config.embedding_size], dtype=tf.float32,
                                        trainable=update_embeddings)

            self.embedding_placeholder = tf.placeholder(tf.float32, [self.vocab_size, self.config.embedding_size])
            self.embedding_init = embedding.assign(self.embedding_placeholder)
        else:
            embedding = tf.get_variable('embedding', [self.vocab_size, self.hidden_size], dtype=tf.float32)

        # create lists of (batch,hidden_size) inputs for models
        premise_inputs = tf.nn.embedding_lookup(embedding, self.premise)
        hypothesis_inputs = tf.nn.embedding_lookup(embedding, self.hypothesis)

        if pretrained_embeddings is not None:
            with tf.variable_scope("input_projection"):
                premise_inputs = input_projection3D(premise_inputs, self.hidden_size)
            with tf.variable_scope("input_projection", reuse=True):
                hypothesis_inputs = input_projection3D(hypothesis_inputs, self.hidden_size)

        if self.config.no_cell:
            hyp_outputs = hypothesis_inputs
            premise_outputs = premise_inputs

        else:

            premise_inputs = [tf.squeeze(single_input, [1]) for single_input in tf.split(1, self.prem_steps, premise_inputs)]
            hypothesis_inputs = [tf.squeeze(single_input, [1]) for single_input in tf.split(1, self.hyp_steps, hypothesis_inputs)]

            with tf.variable_scope("premise_f"):
                prem_f = rnn_cell.GRUCell(self.config.encoder_size)
                self.prem_cell_f = rnn_cell.MultiRNNCell([prem_f]* self.num_layers)
            with tf.variable_scope("premise_b"):
                prem_b = rnn_cell.GRUCell(self.config.encoder_size)
                self.prem_cell_b = rnn_cell.MultiRNNCell([prem_b]* self.num_layers)

            # run GRUs over premise + hypothesis
            if self.bidirectional:
                premise_outputs, prem_state_f, prem_state_b = rnn.bidirectional_rnn(
                    self.prem_cell_f,self.prem_cell_b, premise_inputs,dtype=tf.float32, scope="gru_premise")
            else:
                premise_outputs, prem_state = rnn.rnn(
                    self.prem_cell_f, premise_inputs, dtype=tf.float32, scope="gru_premise")

            premise_outputs = tf.concat(1, [tf.expand_dims(x,1) for x in premise_outputs])

            with tf.variable_scope("hypothesis_f"):
                hyp_f = rnn_cell.GRUCell(self.config.encoder_size)
                self.hyp_cell_f = rnn_cell.MultiRNNCell([hyp_f] * self.num_layers)

            with tf.variable_scope("hypothesis_b"):
                hyp_b = rnn_cell.GRUCell(self.config.encoder_size)
                self.hyp_cell_b = rnn_cell.MultiRNNCell([hyp_b] * self.num_layers)

            if self.bidirectional:
                hyp_outputs, hyp_state_f, hyp_state_b = rnn.bidirectional_rnn(
                    self.hyp_cell_f,self.hyp_cell_b,hypothesis_inputs,dtype=tf.float32, scope= "gru_hypothesis")
            else:
                hyp_outputs, hyp_state = rnn.rnn(self.hyp_cell_f,hypothesis_inputs, dtype=tf.float32, scope="gru_hypothesis")

            hyp_outputs = tf.concat(1, [tf.expand_dims(x,1) for x in hyp_outputs])


        with tf.variable_scope("prediction"):
            prediction, stopping_probs, iterations = self.do_act_steps(
                                             premise_outputs, hyp_outputs)

        # make it easy to get this info out of the model later
        self.remainder = 1.0 - stopping_probs
        self.iterations = iterations
        #iterations = tf.Print(iterations, [iterations], message="Iterations: ", summarize=20)
        #remainder = tf.Print(remainder, [remainder], message="Remainder: ", summarize=20)
        # softmax over outputs to generate distribution over [neutral, entailment, contradiction]

        softmax_w = tf.get_variable("softmax_w", [2*self.rep_size, 3])
        softmax_b = tf.get_variable("softmax_b", [3])
        self.logits = tf.matmul(prediction, softmax_w) + softmax_b   # dim (batch_size, 3)

        _, targets = tf.nn.top_k(self.targets)

        loss = seq2seq.sequence_loss_by_example(
                [self.logits],
                [targets],
                [tf.ones([batch_size])],
                3)
        self.cost = tf.reduce_mean(loss) + self.config.step_penalty*tf.reduce_mean((self.remainder) + tf.cast(iterations, tf.float32))

        if self.config.embedding_reg and update_embeddings:
            self.cost += self.config.embedding_reg * (tf.reduce_mean(tf.square(embedding)))

        _, logit_max_index = tf.nn.top_k(self.logits)

        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(logit_max_index, targets), tf.float32))

        if is_training:

            self.lr = tf.Variable(config.learning_rate, trainable=False)

            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.config.max_grad_norm)

            #optimizer = tf.train.GradientDescentOptimizer(self.lr)
            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Пример #35
0
def model(words_size, embedding_size, oseq_len, source_len, simplified_len, defendant_nfilters, defendant_width,
          encoder_hidden, decoder_hidden, lstm_layer, batch_size, source_nfilters, source_width, is_train):    
    
    args = construct_data(words_size=words_size, 
                          embedding_size=embedding_size,
                          source_len=source_len,
                          simplified_len=simplified_len,
                          oseq_len=oseq_len, 
                          encoder_hidden=encoder_hidden,
                          decoder_hidden=decoder_hidden,
                          source_nfilters=source_nfilters,
                          source_width=source_width,
                          defendant_nfilters=defendant_nfilters,
                          defendant_width=defendant_width)
    
    embedding = args['embedding']
    conv_args=args['conv_args']
    weigth_generation = args['weigth_generation']
    bias_generation = args['bias_generation']
    weigth_copy = args['weigth_copy']
    bias_copy = args['bias_copy']
    source = args['source']
    defendant = args['defendant']
    defendant_length = args['defendant_length']
    label = args['label']
    decoder_inputs = args['decoder_inputs']
    loss_weights = args['loss_weights']
    keep_prob = args['keep_prob']
    sample_rate = args['sample_rate']
    
    
    conv_encoder = encoder_conv(source=source,
                                defendant=defendant,
                                conv_args=conv_args,
                                keep_prob=keep_prob,
                                embedding=embedding,
                                is_train=is_train)
    
    rnn_encoder = encoder_rnn(defendant=defendant,
                              defendant_length=defendant_length,
                              encoder_hidden=encoder_hidden, 
                              keep_prob=keep_prob,
                              batch_size=batch_size,
                              embedding=embedding)

    rnn_decoder, state_decoder = decoder_rnn(conv_encoder=conv_encoder,
                                             rnn_encoder=rnn_encoder,
                                             defendant=defendant,
                                             decoder_inputs=decoder_inputs,
                                             decoder_hidden=decoder_hidden, 
                                             weigth_generation=weigth_generation,
                                             weigth_copy=weigth_copy,
                                             bias_generation=bias_generation,
                                             bias_copy=bias_copy,
                                             n_steps=oseq_len,
                                             batch_size=batch_size, 
                                             lstm_layer=lstm_layer, 
                                             keep_prob=keep_prob,
                                             embedding=embedding,
                                             sample_rate=sample_rate,
                                             is_train=is_train)

    
    cost = tf.reduce_mean(seq2seq.sequence_loss_by_example(logits=rnn_decoder,
                                                           targets=tf.unpack(tf.transpose(label, [1,0])),
                                                           weights=tf.unpack(tf.transpose(tf.convert_to_tensor(
                                                               loss_weights, dtype=tf.float32), [1,0]))))
    
    
    words_prediction = tf.argmax(tf.transpose(tf.pack(rnn_decoder), [1, 0, 2]), 2)
    
    
    print ('build model ')
    
    
    return {'outputs':rnn_decoder, 
            'embedding':embedding,
            'cost':cost,
            'sample_rate':sample_rate,
            'words_prediction':words_prediction,
            'source':source,
            'defendant':defendant,
            'defendant_length':defendant_length,
            'label':label, 
            'decoder_inputs':decoder_inputs, 
            'loss_weights':loss_weights, 
            'keep_prob':keep_prob}
Пример #36
0
    def __init__(self,
                 embedding,
                 max_length,
                 initial_state,
                 attention_states,
                 cell,
                 num_samples=512,
                 feed_previous=False,
                 update_embedding_for_previous=True,
                 dtype=dtypes.float32,
                 scope=None,
                 initial_state_attention=False,
                 **kwargs):
        # account for _GO and _EOS
        self.max_length = max_length + 2

        self.lengths = kwargs.get(
            'lengths',
            tf.placeholder(tf.int32, shape=[None], name="decoder_lengths"))
        self.inputs = kwargs.get('inputs', [
            tf.placeholder(
                tf.int32, shape=[None], name="decoder_input{0}".format(i))
            for i in xrange(self.max_length)
        ])
        self.weights = kwargs.get('weights', [
            tf.placeholder(
                tf.float32, shape=[None], name="decoder_weight{0}".format(i))
            for i in xrange(self.max_length)
        ])

        self.targets = [
            self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)
        ]
        self.targets.append(tf.zeros_like(self.targets[0]))

        num_symbols = embedding.get_shape()[0].value
        output_projection = None
        loss_function = None
        self.cell = cell
        self.feed_previous = feed_previous

        if num_samples > 0 and num_samples < num_symbols:
            with tf.device('/cpu:0'):
                w = tf.get_variable('proj_w', [cell.output_size, num_symbols])
                w_t = tf.transpose(w)
                b = tf.get_variable('proj_b', [num_symbols])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device('/cpu:0'):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels,
                                                      num_samples, num_symbols)

            loss_function = sampled_loss

        output_size = None
        if output_projection is None:
            cell = rnn_cell.OutputProjectionWrapper(cell, num_symbols)
            output_size = num_symbols

        if output_size is None:
            output_size = cell.output_size
        if output_projection is not None:
            proj_weights = ops.convert_to_tensor(output_projection[0],
                                                 dtype=dtype)
            proj_weights.get_shape().assert_is_compatible_with(
                [cell.output_size, num_symbols])
            proj_biases = ops.convert_to_tensor(output_projection[1],
                                                dtype=dtype)
            proj_biases.get_shape().assert_is_compatible_with([num_symbols])

        with variable_scope.variable_scope(scope
                                           or "embedding_attention_decoder"):
            loop_function = self._extract_argmax_and_embed(
                embedding, output_projection,
                update_embedding_for_previous) if feed_previous else None

            emb_inp = [
                embedding_ops.embedding_lookup(embedding, i)
                for i in self.inputs
            ]
            self.outputs, self.state = attention_decoder(
                emb_inp,
                self.lengths,
                initial_state,
                attention_states,
                cell,
                output_size=output_size,
                loop_function=loop_function,
                initial_state_attention=initial_state_attention)

        targets = [self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)]
        targets.append(tf.zeros_like(self.inputs[-1]))

        # loss for each instance in batch
        self.instance_loss = sequence_loss_by_example(
            self.outputs,
            targets,
            self.weights,
            softmax_loss_function=loss_function)

        # aggregated average loss per instance for batch
        self.loss = tf.reduce_sum(self.instance_loss) / math_ops.cast(
            array_ops.shape(targets[0])[0], self.instance_loss.dtype)

        if output_projection is not None:
            self.projected_output = [
                tf.matmul(o, output_projection[0]) + output_projection[1]
                for o in self.outputs
            ]
            self.decoded_outputs = tf.unpack(
                tf.argmax(tf.pack(self.projected_output), 2))
        else:
            self.decoded_outputs = tf.unpack(
                tf.argmax(tf.pack(self.outputs), 2))
        self.decoded_lenghts = tf.reduce_sum(
            tf.sign(tf.transpose(tf.pack(self.decoded_outputs))), 1)
        self.decoded_batch = tf.transpose(tf.pack(self.decoded_outputs))