Пример #1
0
class RNN_Model():
    def load_data(self, LOAD_DATA=False):
        """Loads train/dev/test data and builds vocabulary."""
        if LOAD_DATA:
            self.vocab = Vocab(
            )  # only initialize the Vocab class because of the embedding matrix
        else:
            self.train_data, self.dev_data, self.test_data = tr.simplified_data(
                600, 40)
            #self.train_data, self.dev_data , self.test_data = tr.simplified_data(2000, 500)
            # build vocab from training data
            self.vocab = Vocab()
            train_sents = [t.get_words() for t in self.train_data]
            self.vocab.construct(
                list(itertools.chain.from_iterable(train_sents)))

    def inference(self, tree, predict_only_root=False):
        """For a given tree build the RNN models computation graph up to where it
            may be used for inference.
        Args:
            tree: a Tree object on which to build the computation graph for the RNN
        Returns:
            softmax_linear: Output tensor with the computed logits.
        """
        node_tensors = self.add_model(tree.root)
        if predict_only_root:
            node_tensors = node_tensors[tree.root]
        else:
            node_tensors = [
                tensor for node, tensor in node_tensors.iteritems()
                if node.label != 2
            ]
            node_tensors = tf.concat(0, node_tensors)
        return self.add_projections(node_tensors)

    def add_model_vars(self):
        '''
        You model contains the following parameters:
            embedding:  tensor(vocab_size, embed_size)
            W1:         tensor(2* embed_size, embed_size)
            b1:         tensor(1, embed_size)
            U:          tensor(embed_size, output_size)
            bs:         tensor(1, output_size)
        Hint: Add the tensorflow variables to the graph here and *reuse* them while building
                the compution graphs for composition and projection for each tree
        Hint: Use a variable_scope "Composition" for the composition layer, and
              "Projection") for the linear transformations preceding the softmax.
        '''
        #self.emb_words = emb_words
        #self.emb_numpymatrix = word2vec_embedding
        with tf.variable_scope('Composition'):
            ### YOUR CODE HERE
            # USE THE PRETRAINED WORD ENBEDDING IN SELF.VOCAB
            word2vec_embedding = self.vocab.emb_numpymatrix
            E = tf.constant(word2vec_embedding)
            E = tf.cast(E, tf.float32)
            tf.get_variable("Word2vec_E", initializer=E, trainable=False)
            #tf.get_variable("Word2vec_E", initializer = word2vec_embedding,  trainable=False)
            #tf.get_variable('embedding', shape=[self.vocab.total_words, self.config.embed_size])
            tf.get_variable(
                'W1',
                shape=[2 * self.config.embed_size, self.config.embed_size])
            tf.get_variable('b1', shape=[1, self.config.embed_size])

            ### END YOUR CODE
        with tf.variable_scope('Projection'):
            ### YOUR CODE HERE
            tf.get_variable(
                'U', shape=[self.config.embed_size, self.config.label_size])
            tf.get_variable('bs', shape=[1, self.config.label_size])
            ### END YOUR CODE

    def add_model(self, node):
        """Recursively build the model to compute the phrase embeddings in the tree

        Hint: Refer to tree.py and vocab.py before you start. Refer to
              the model's vocab with self.vocab
        Hint: Reuse the "Composition" variable_scope here
        Hint: Store a node's vector representation in node.tensor so it can be
              used by it's parent
        Hint: If node is a leaf node, it's vector representation is just that of the
              word vector (see tf.gather()).
        Args:
            node: a Node object
        Returns:
            node_tensors: Dict: key = Node, value = tensor(1, embed_size)
        """
        with tf.variable_scope('Composition', reuse=True):
            ### YOUR CODE HERE
            #embedding = tf.get_variable('embedding')
            embedding = tf.get_variable('Word2vec_E')
            W1 = tf.get_variable('W1')
            b1 = tf.get_variable('b1')
            ### END YOUR CODE

        node_tensors = dict()
        curr_node_tensor = None
        if node.isLeaf:
            ### YOUR CODE HERE
            #idx = self.vocab.encode(node.word)
            try:
                idx = self.vocab.emb_wordtoindex(node.word)
            except:
                idx = self.vocab.emb_wordtoindex("UNK")
            h = tf.gather(embedding, indices=idx)
            curr_node_tensor = tf.expand_dims(h, 0)
            ### END YOUR CODE
        else:
            node_tensors.update(self.add_model(node.left))
            node_tensors.update(self.add_model(node.right))
            ### YOUR CODE HERE
            HlHr = tf.concat(
                1, [node_tensors[node.left], node_tensors[node.right]])
            curr_node_tensor = tf.nn.relu(tf.matmul(HlHr, W1) + b1)
            ### END YOUR CODE
        node_tensors[node] = curr_node_tensor
        return node_tensors

    def add_projections(self, node_tensors):
        """Add projections to the composition vectors to compute the raw sentiment scores

        Hint: Reuse the "Projection" variable_scope here
        Args:
            node_tensors: tensor(?, embed_size)
        Returns:
            output: tensor(?, label_size)
        """
        logits = None
        ### YOUR CODE HERE
        with tf.variable_scope('Projection', reuse=True):
            U = tf.get_variable('U')
            bs = tf.get_variable('bs')
            logits = tf.matmul(node_tensors, U) + bs
        ### END YOUR CODE
        return logits

    def loss(self, logits, labels):
        """Adds loss ops to the computational graph.

        Hint: Use sparse_softmax_cross_entropy_with_logits
        Hint: Remember to add l2_loss (see tf.nn.l2_loss)
        Args:
            logits: tensor(num_nodes, output_size)
            labels: python list, len = num_nodes
        Returns:
            loss: tensor 0-D
        """
        loss = None
        # YOUR CODE HERE
        with tf.variable_scope('Composition', reuse=True):
            W1 = tf.get_variable('W1')
        with tf.variable_scope('Projection', reuse=True):
            U = tf.get_variable('U')

        l2loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U)

        cross_entropy = tf.reduce_sum(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels))
        loss = cross_entropy + self.config.l2 * l2loss

        # sparse_softmax = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,labels)
        # tf.add_to_collection('total_loss', tf.reduce_sum(sparse_softmax))
        # for variable in [W1, U]:
        #     tf.add_to_collection('total_loss', self.config.l2 * tf.nn.l2_loss(variable))
        # loss = tf.add_n(tf.get_collection('total_loss'))
        # END YOUR CODE
        return loss

    def training(self, loss):
        """Sets up the training Ops.

        Creates an optimizer and applies the gradients to all trainable variables.
        The Op returned by this function is what must be passed to the
        `sess.run()` call to cause the model to train. See

        https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer

        for more information.

        Hint: Use tf.train.GradientDescentOptimizer for this model.
                Calling optimizer.minimize() will return a train_op object.

        Args:
            loss: tensor 0-D
        Returns:
            train_op: tensorflow op for training.
        """
        train_op = None
        # YOUR CODE HERE
        optimizer = tf.train.GradientDescentOptimizer(self.config.lr)
        train_op = optimizer.minimize(loss)
        # END YOUR CODE
        return train_op

    def predictions(self, y):
        """Returns predictions from sparse scores

        Args:
            y: tensor(?, label_size)
        Returns:
            predictions: tensor(?,1)
        """
        predictions = None
        # YOUR CODE HERE
        predictions = tf.argmax(y, 1)
        # END YOUR CODE
        return predictions

    def __init__(self, config, LOAD_DATA=True):
        self.config = config
        self.load_data(LOAD_DATA=True)

    def predict(self, trees, weights_path, get_loss=False):
        """Make predictions from the provided model."""
        results = []
        losses = []
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
        config = tf.ConfigProto(gpu_options=gpu_options)
        #config = tf.ConfigProto()
        #config.gpu_options.allow_growth=True
        for i in xrange(int(math.ceil(len(trees) / float(RESET_AFTER)))):
            with tf.Graph().as_default(), tf.Session(
                    config=config) as sess:  ###########################
                self.add_model_vars()  ##############################
                saver = tf.train.Saver()
                saver.restore(sess, weights_path)
                for tree in trees[i * RESET_AFTER:(i + 1) * RESET_AFTER]:
                    if tree == "empty":  # for the testing tree that has multiple childs
                        root_prediction = int(
                            random.getrandbits(1)
                        )  # randomly predict the answer (choose evenly from 0 or 1)
                        results.append(root_prediction)
                        continue

                    logits = self.inference(
                        tree, True)  ##############################
                    predictions = self.predictions(logits)
                    root_prediction = sess.run(predictions)[0]
                    if get_loss:
                        #root_label = tree.root.label
                        root_label = tree.label
                        loss = sess.run(self.loss(
                            logits,
                            [root_label]))  ###############################
                        losses.append(loss)
                    results.append(root_prediction)
        return results, losses

    def run_epoch(self, new_model=False, verbose=True):
        step = 0
        loss_history = []
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.4)
        config = tf.ConfigProto(gpu_options=gpu_options)
        #config = tf.ConfigProto()
        #config.gpu_options.allow_growth=True
        while step < len(self.train_data):
            with tf.Graph().as_default(), tf.Session(config=config) as sess:
                self.add_model_vars()
                if new_model:
                    init = tf.initialize_all_variables()
                    sess.run(init)
                else:
                    saver = tf.train.Saver()
                    saver.restore(sess,
                                  './weights/%s.temp' % self.config.model_name)
                for _ in xrange(RESET_AFTER):
                    if step >= len(self.train_data):
                        break
                    tree = self.train_data[step]
                    #logits = self.inference(tree)
                    #labels = [l for l in tree.labels if l!=2]
                    logits = self.inference(tree, True)
                    label = tree.label
                    loss = self.loss(logits, [label])
                    train_op = self.training(loss)
                    loss, _ = sess.run([loss, train_op])
                    loss_history.append(loss)
                    if verbose:
                        sys.stdout.write('\r{} / {} :    loss = {}'.format(
                            step, len(self.train_data), np.mean(loss_history)))
                        sys.stdout.flush()
                    step += 1
                saver = tf.train.Saver()
                if not os.path.exists("./weights"):
                    os.makedirs("./weights")
                saver.save(sess, './weights/%s.temp' % self.config.model_name)
        train_preds, _ = self.predict(
            self.train_data, './weights/%s.temp' % self.config.model_name)
        val_preds, val_losses = self.predict(self.dev_data,
                                             './weights/%s.temp' %
                                             self.config.model_name,
                                             get_loss=True)
        #val_preds, _ = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True)
        #train_labels = [t.root.label for t in self.train_data]
        #val_labels = [t.root.label for t in self.dev_data]
        train_labels = [t.label for t in self.train_data]
        val_labels = [t.label for t in self.dev_data]
        train_acc = np.equal(train_preds, train_labels).mean()
        val_acc = np.equal(val_preds, val_labels).mean()

        print
        print 'Training acc (only root node): {}'.format(train_acc)
        print 'Valiation acc (only root node): {}'.format(val_acc)
        print self.make_conf(train_labels, train_preds)
        print self.make_conf(val_labels, val_preds)
        return train_acc, val_acc, loss_history, np.mean(val_losses)
        #return train_acc, val_acc, loss_history

    def train(self, verbose=True):
        complete_loss_history = []
        train_acc_history = []
        val_acc_history = []
        prev_epoch_loss = float('inf')
        best_val_loss = float('inf')
        best_val_epoch = 0
        stopped = -1
        for epoch in xrange(self.config.max_epochs):
            print 'epoch %d' % epoch
            if epoch == 0:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch(
                    new_model=True)
                #train_acc, val_acc, loss_history = self.run_epoch(new_model=True)
            else:
                train_acc, val_acc, loss_history, val_loss = self.run_epoch()
                #train_acc, val_acc, loss_history = self.run_epoch()
            complete_loss_history.extend(loss_history)
            train_acc_history.append(train_acc)
            val_acc_history.append(val_acc)

            #lr annealing
            epoch_loss = np.mean(loss_history)
            if epoch_loss > prev_epoch_loss * self.config.anneal_threshold:
                self.config.lr /= self.config.anneal_by
                print 'annealed lr to %f' % self.config.lr
            prev_epoch_loss = epoch_loss

            #save if model has improved on val_loss
            if val_loss < best_val_loss:
                shutil.copyfile('./weights/%s.temp' % self.config.model_name,
                                './weights/%s' % self.config.model_name)
                best_val_loss = val_loss
                best_val_epoch = epoch

            # if model has not imprvoved for a while stop
            if epoch - best_val_epoch > self.config.early_stopping:
                stopped = epoch
                #break

        if verbose:
            sys.stdout.write('\r')
            sys.stdout.flush()

        #print '\n\nstopped at %d\n'%stopped
        return {
            'loss_history': complete_loss_history,
            'train_acc_history': train_acc_history,
            'val_acc_history': val_acc_history,
        }

    def make_conf(self, labels, predictions):
        confmat = np.zeros([2, 2])
        for l, p in itertools.izip(labels, predictions):
            confmat[l, p] += 1
        return confmat