Пример #1
0
class LSTMLM_Model(LanguageModel):
    def load_own_data(self,
                      filename,
                      filename2,
                      filename3,
                      debug=False,
                      encoding='utf-8'):
        """Loads starter word-vectors and train/dev/test data."""
        self.vocab = Vocab()
        self.vocab.construct(get_datafile(filename))
        # self.vocab.construct(get_ptb_dataset('train'))
        self.encoded_train = np.array([
            self.vocab.encode(word)
            for word in get_datafile(filename, encoding=encoding)
        ],
                                      dtype=np.int32)
        self.encoded_valid = np.array([
            self.vocab.encode(word)
            for word in get_datafile(filename2, encoding=encoding)
        ],
                                      dtype=np.int32)
        self.encoded_test = np.array([
            self.vocab.encode(word)
            for word in get_datafile(filename3, encoding=encoding)
        ],
                                     dtype=np.int32)
        if debug:
            num_debug = 1024
            self.encoded_train = self.encoded_train[:num_debug]
            self.encoded_valid = self.encoded_valid[:num_debug]
            self.encoded_test = self.encoded_test[:num_debug]

    def add_placeholders(self):
        self.input_placeholder = tf.placeholder(tf.int32,
                                                [None, self.config.num_steps],
                                                name='Input')
        self.labels_placeholder = tf.placeholder(tf.int32,
                                                 [None, self.config.num_steps],
                                                 name='Target')
        self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout')
        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")

    def add_projection(self, rnn_outputs):
        with tf.variable_scope('Projection'):
            U = tf.get_variable('Matrix',
                                [self.config.hidden_size,
                                 len(self.vocab)])
            proj_b = tf.get_variable('Bias', [len(self.vocab)])
            outputs = [tf.matmul(o, U) + proj_b for o in rnn_outputs]
        # END YOUR CODE
        return outputs

    def add_embedding(self):
        with tf.device('/cpu:0'):
            embedding = tf.get_variable(
                'Embedding', [len(self.vocab), self.config.embed_size],
                trainable=True)
            inputs = tf.nn.embedding_lookup(embedding, self.input_placeholder)
            # inputs = [
            #     tf.squeeze(x, [1]) for x in tf.split(inputs, self.config.num_steps, 1)]
            return inputs

    def add_projection(self, lstm_output):
        with tf.variable_scope('Projection'):
            size = self.config.hidden_size
            vocab_size = self.vocab.__len__()
            softmax_w = tf.get_variable("softmax_w", [size, vocab_size],
                                        dtype=data_type())
            softmax_b = tf.get_variable("softmax_b", [vocab_size],
                                        dtype=data_type())
            logits = tf.nn.xw_plus_b(lstm_output, softmax_w, softmax_b)
            # Reshape logits to be a 3-D tensor for sequence loss
            logits = tf.reshape(
                logits,
                [self.config.batch_size, self.config.num_steps, vocab_size])
        return logits

    def add_loss_op(self, output):
        # Use the contrib sequence loss and average over the batches
        # all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
        # cross_entropy = sequence_loss(
        #     output, [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab))
        # [tf.reshape(self.labels_placeholder, [-1])],
        # cost = tf.reduce_sum(cross_entropy)
        loss_1 = tf.contrib.seq2seq.sequence_loss(
            output,
            self.labels_placeholder,
            tf.ones([self.config.batch_size, self.config.num_steps],
                    dtype=data_type()),
            average_across_timesteps=False,
            average_across_batch=True)
        self.cost = tf.reduce_sum(loss_1)
        tf.add_to_collection('total_loss', self.cost)
        loss = tf.add_n(tf.get_collection('total_loss'))
        # END YOUR CODE
        return loss

    def assign_lr(self, session, lr_value):
        session.run(self._lr_update, feed_dict={self._new_lr: lr_value})

    def add_training_op(self):
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.train.get_or_create_global_step())
        self._lr_update = tf.assign(self._lr, self._new_lr)
        # optimizer = tf.train.AdamOptimizer(self.config.lr)
        # train_op = optimizer.minimize(self.calculate_loss)
        return train_op

    def _get_lstm_cell(self, is_training):
        return tf.contrib.rnn.BasicLSTMCell(self.config.hidden_size,
                                            forget_bias=0.0,
                                            state_is_tuple=True,
                                            reuse=not is_training)

    def add_model(self, inputs, is_training):
        '''
        Create the LSTM model
        '''
        print(inputs.shape)
        with tf.variable_scope('InputDropout'):
            if is_training and self.config.dropout < 1:
                inputs = tf.nn.dropout(inputs, self.config.dropout)
        with tf.variable_scope('LSTMMODEL') as scope:

            def make_cell():
                cell = self._get_lstm_cell(is_training)
                if is_training and self.config.dropout < 1:
                    cell = tf.contrib.rnn.DropoutWrapper(
                        cell, output_keep_prob=self.config.dropout)
                return cell

            cell = tf.contrib.rnn.MultiRNNCell(
                [make_cell() for _ in range(self.config.num_layers)],
                state_is_tuple=True)
            self.initial_state = cell.zero_state(self.config.batch_size,
                                                 data_type())
            state = self.initial_state
            # inputs = tf.unstack(inputs, num=self.config.num_steps, axis=1)
            # outputs, state = tf.nn.static_rnn(
            #     cell, inputs, initial_state=self.initial_state)
            outputs = []
            with tf.variable_scope("RNNV"):
                for time_step in range(self.config.num_steps):
                    if time_step > 0:
                        tf.get_variable_scope().reuse_variables()
                    (cell_output, state) = cell(inputs[:, time_step, :], state)
                    outputs.append(cell_output)
            output = tf.reshape(tf.concat(outputs, 1),
                                [-1, self.config.hidden_size])
            # return output, state
            # outputs, states = tf.nn.dynamic_rnn(
            #     cell, inputs, dtype=tf.float32)
            self.final_state = state
            return output

    def run_epoch(self, session, data, train_op=None, verbose=10):
        config = self.config
        dp = config.dropout
        if not train_op:
            train_op = tf.no_op()
            dp = 1
        total_steps = sum(
            1 for x in ptb_iterator(data, config.batch_size, config.num_steps))
        # total_loss = []
        # state = self.initial_state.eval()
        costs = 0.0
        iters = 0
        for step, (x, y) in enumerate(
                ptb_iterator(data, config.batch_size, config.num_steps)):
            # We need to pass in the initial state and retrieve the final state to give
            # the RNN proper history #self.initial_state: state,
            feed = {
                self.input_placeholder: x,
                self.labels_placeholder: y,
                self.dropout_placeholder: dp
            }
            loss, state, cost, _ = session.run(
                [self.calculate_loss, self.final_state, self.cost, train_op],
                feed_dict=feed)
            # total_loss.append(loss)
            costs += cost
            iters += self.config.num_steps
            if verbose and step % verbose == 0:
                sys.stdout.write('\r{} / {} : pp = {}'.format(
                    step, total_steps, np.exp(costs / iters)))
                # sys.stdout.write('\r{} / {} : pp = {}'.format(
                #     step, total_steps, np.exp(np.mean(total_loss))))
                sys.stdout.flush()
        if verbose:
            sys.stdout.write('\r')
        # return np.exp(np.mean(total_loss))
        return np.exp(costs / iters)

    def assign_lr(self, session, lr_value):
        session.run(self._lr_update, feed_dict={self._new_lr: lr_value})

    def __init__(self, config):
        self.config = config
        dirname = "./data/"
        self.load_own_data(filename=dirname + "train_data",
                           filename2=dirname + "dev_data",
                           filename3=dirname + "test_data",
                           debug=False,
                           encoding='Latin-1')
        self.add_placeholders()

        # self._lr = tf.Variable(0.0, trainable=False)
        # self._lr_update = tf.assign(self._lr, self._new_lr)

        self.inputs = self.add_embedding()
        self.lstm_outputs = self.add_model(self.inputs,
                                           self.config.is_training)
        self.outputs = self.add_projection(self.lstm_outputs)

        vocab_size = self.vocab.__len__()
        logits2 = tf.reshape(
            self.outputs,
            [self.config.batch_size * self.config.num_steps, vocab_size])
        local_pred = tf.nn.softmax(tf.cast(logits2, tf.float64))
        local_pred2 = tf.reshape(
            local_pred,
            [self.config.batch_size, self.config.num_steps, vocab_size])
        self.predictions = tf.transpose(local_pred2, [1, 0, 2])

        self.calculate_loss = self.add_loss_op(self.outputs)
        self.train_step = self.add_training_op()