예제 #1
0
    def eval_batches(self, loss_fn, sess, eval_set, num_batches):
        """Evaluate the loss on a number of given minibatches of a dataset.

        Args:
            loss_fn: loss function
            sess: tf.Session()
            eval_set: full dataset, as passed to run_epoch
            num_batches: number of batches to evaluate
        Returns:
            loss: loss over the batches (a scalar)
        """
        losses = []
        for i, (inputs_batch, outputs_batch) in enumerate(
                minibatches(eval_set, self.batch_size)):
            if i >= num_batches:
                break
            feed = {
                self.image_in:
                inputs_batch,
                self.emotion_label:
                outputs_batch,
                self.gaussian_in:
                np.random.normal(size=(len(inputs_batch), self.style_dim))
            }
            loss = self.eval_on_batch(loss_fn, feed, sess)
            losses.append(loss)
        return np.mean(losses)
예제 #2
0
    def predict(self, sess, dev, labels):
        predicted_sentences = []

        dev_processed, labels_processed = self.preprocess_sequence_data(
            dev, labels)
        prog = Progbar(target=1 +
                       int(len(dev_processed) / self.config.batch_size))
        # predict in batches
        for i, (enc_batch, dec_batch) in enumerate(
                minibatches(dev_processed,
                            labels_processed,
                            self.config.batch_size,
                            shuffle=False)):
            #print enc_batch.shape, dec_batch.shape
            feed = self.create_feed_dict(enc_batch, dec_batch)

            outputs = sess.run(self.test_op, feed_dict=feed)
            for j in range(outputs.shape[0]):
                sentence = [self.labels_vocab.id2tok[k] for k in outputs[j, :]]
                if "</s>" in sentence:
                    sentence = sentence[:sentence.index("</s>")]
                predicted = " ".join(sentence)
                print predicted
                predicted_sentences.append(predicted)
            prog.update(i + 1)

        return predicted_sentences
예제 #3
0
    def predict(self, sess, dev):
        predicted_sentences = []

        dev_processed, labels_processed = self.preprocess_sequence_data(
            dev[0], dev[1])
        prog = Progbar(target=int(len(dev_processed) / self.config.batch_size))
        # predict in batches
        tot_loss = 0
        for i, (enc_batch, dec_batch) in enumerate(
                minibatches(dev_processed, labels_processed,
                            self.config.batch_size, False)):
            #print enc_batch.shape, dec_batch.shape
            feed = self.create_feed_dict(enc_batch, dec_batch)
            if self.beam_search != None:
                outputs = sess.run(self.test_op, feed_dict=feed)
            else:
                outputs, loss = sess.run([self.test_op, self.predict_loss],
                                         feed_dict=feed)
                #loss = 0
                #outputs = sess.run(self.test_op, feed_dict = feed)
                tot_loss += loss
            for j in range(outputs.shape[0]):
                predicted = self.construct_sentence(
                    outputs[j, :], dev[2][(i * self.config.batch_size) + j])
                predicted_sentences.append(predicted)
                #print predicted
            if self.beam_search != None:
                prog.update(i + 1)
            else:
                prog.update(i + 1, [("test loss", tot_loss / (i + 1))])
        print ""
        return predicted_sentences
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw, epoch, last_epoch):
        prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size))
        curr_loss = 0.
        num_encountered = 0
        for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            curr_loss += loss
            prog.update(i + 1, [("train loss", loss)])
            if self.report: self.report.log_train_loss(loss)
            num_encountered +=1
        # print curr_loss/num_encountered
        losses.append(curr_loss/num_encountered)
        epochs.append(epoch+1)
        print("")

        logger.info("Evaluating on development data")
        token_cm, metrics = self.evaluate(sess, dev_set, dev_set_raw)
        if last_epoch: 
            self.outputConfusionMatrix(token_cm.as_data())
        logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        logger.debug("Token-level scores:\n" + token_cm.summary())
        logger.info("Accuracy: %.2f", metrics[1])
        logger.info("Error: %.2f", metrics[0])

        return metrics[0], metrics[1]
예제 #5
0
 def run_epoch(self,
               tf_ops,
               loss_fns,
               sess,
               train_examples,
               dev_set,
               batch_size,
               logfile=None):
     for i, (inputs_batch, outputs_batch) in enumerate(
             minibatches(train_examples, batch_size)):
         feed = {
             self.image_in:
             inputs_batch,
             self.emotion_label:
             outputs_batch,
             self.gaussian_in:
             np.random.normal(size=(len(inputs_batch), self.style_dim))
         }
         self.train_on_batch(tf_ops, feed, sess)
     dev_loss_sum = 0
     for (loss_fn, loss_name) in loss_fns:
         train_loss = self.eval_batches(loss_fn, sess, train_examples,
                                        self.n_eval_batches)
         print("Train {} loss: {:.6f}".format(loss_name, train_loss))
         dev_loss = self.eval_batches(loss_fn, sess, dev_set,
                                      self.n_eval_batches)
         print("Dev {} loss: {:.6f}".format(loss_name, dev_loss))
         dev_loss_sum += dev_loss
     print("")
     return dev_loss_sum
 def run_epoch(self, sess, train, labels, dev=None, dev_labels=None):
     prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
     for i, (enc_batch, dec_batch) in enumerate(
             minibatches(train, labels, self.config.batch_size)):
         loss = self.train_on_batch(sess, enc_batch, dec_batch)
         prog.update(i + 1, [("train loss", loss)])
     logger.info("Finished Epoch! Running Dev tests")
     if dev != None and dev_labels != None:
         return self.dev_loss(sess, dev, dev_labels)
     return loss
예제 #7
0
 def get_gaussians(self, sess, dev_set, num_samples, output_path):
     for i, (inputs_batch,
             outputs_batch) in enumerate(minibatches(dev_set, num_samples)):
         feed = {
             self.image_in: inputs_batch,
             self.emotion_label: outputs_batch
         }
         style = np.array(sess.run(self.gen_styles, feed_dict=feed))
         break
     save_path = os.path.join(output_path, "gaussians.pkl")
     pickle.dump(style, open(save_path, "wb"))
    def output(self, sess, inputs_raw, inputs=None):
        if inputs is None:
            inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw))

        preds = []
        prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size))
        for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)):
            batch = batch[:1] + batch[2:]
            preds_ = self.predict_on_batch(sess, *batch)
            preds += list(preds_)
            prog.update(i + 1, [])
        return self.consolidate_predictions(inputs_raw, inputs, preds)
예제 #9
0
 def predict_on_batch(self, session, dataset):
     batch_num = int(np.ceil(len(dataset) * 1.0 / self.config.batch_size))
     # prog = Progbar(target=batch_num)
     predicts = []
     for i, batch in tqdm(
             enumerate(
                 minibatches(dataset, self.config.batch_size,
                             shuffle=False))):
         pred = self.answer(session, batch)
         # prog.update(i + 1)
         predicts.extend(pred)
     return predicts
예제 #10
0
    def run_epoch(self, sess, train, labels):
        prog = Progbar(target=1 + int(len(train) / self.config.batch_size))
        losses, grad_norms = [], []
        for i, (enc_batch, dec_batch) in enumerate(
                minibatches(train, labels, self.config.batch_size)):
            #print batch
            loss = self.train_on_batch(sess, enc_batch, dec_batch)
            #losses.append(loss)
            #grad_norms.append(grad_norm)
            #loss = 1
            prog.update(i + 1, [("train loss", loss)])

        return loss
 def dev_loss(self, sess, dev_processed, labels_processed):
     prog = Progbar(target=1 +
                    int(len(dev_processed) / self.config.batch_size))
     tot_loss = 0
     for i, (enc_batch, dec_batch) in enumerate(
             minibatches(dev_processed,
                         labels_processed,
                         self.config.batch_size,
                         shuffle=False)):
         feed = self.create_feed_dict(enc_batch, labels_batch=dec_batch)
         _, loss = sess.run([self.test_op, self.predict_loss],
                            feed_dict=feed)
         tot_loss += loss
         prog.update(i + 1, [("dev loss", tot_loss)])
     return tot_loss
예제 #12
0
 def get_reconstructions(self, sess, dev_set, num_samples, output_path):
     for i, (inputs_batch,
             outputs_batch) in enumerate(minibatches(dev_set, num_samples)):
         feed = {
             self.image_in: inputs_batch,
             self.emotion_label: outputs_batch
         }
         outputs = np.array(
             sess.run(self.gen_images_autoencode, feed_dict=feed))
         break
     for i in range(len(outputs)):
         imsave(os.path.join(output_path, "{}.png".format(i)),
                np.squeeze(inputs_batch[i]))
         imsave(os.path.join(output_path, "{}_recon.png".format(i)),
                np.squeeze(outputs[i]))
예제 #13
0
 def get_reconstructions(self, sess, dev_set, num_samples, output_path):
     for i, (inputs_batch, outputs_batch) in enumerate(
             minibatches(dev_set, num_samples, shuffle=False)):
         feed = {
             self.image_in: inputs_batch,
             self.emotion_label: outputs_batch
         }
         outputs = np.array(
             sess.run(self.gen_images_autoencode, feed_dict=feed))
         break
     if not os.path.exists(output_path):
         os.makedirs(output_path)
     for i in range(len(outputs)):
         im = toimage(np.squeeze(inputs_batch[i]), cmin=0, cmax=1)
         im.save(os.path.join(output_path, "{}.png".format(i)))
         im = toimage(np.squeeze(outputs[i]), cmin=0, cmax=1)
         im.save(os.path.join(output_path, "{}_recon.png".format(i)))
예제 #14
0
    def output(self, sess, inputs_raw, inputs):
        """
        Reports the output of the model on examples (uses helper to featurize each example).
        """
        # always require valid inputs arg
        # if inputs is None:
        #     inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw))

        preds = []
        prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size))
        for i, batch in enumerate(
                minibatches(inputs, self.config.batch_size, shuffle=False)):
            # Ignore predict
            batch = batch[:2] + batch[3:]
            preds_ = self.predict_on_batch(sess, *batch)
            preds += list(preds_)
            prog.update(i + 1, [])
        return self.consolidate_predictions(inputs_raw, inputs, preds)
예제 #15
0
    def eval_batches(self, sess, eval_set, num_batches):
        """Evaluate the loss on a number of given minibatches of a dataset.

        Args:
            sess: tf.Session()
            eval_set: full dataset, as passed to run_epoch
            num_batches: number of batches to evaluate
        Returns:
            loss: loss over the batches (a scalar)
        """
        losses = []
        for i, (inputs_batch, outputs_batch) in enumerate(
                minibatches(eval_set, self.config.batch_size)):
            if i >= num_batches:
                break
            loss = self.eval_on_batch(sess, inputs_batch, outputs_batch)
            losses.append(loss)
        return np.mean(losses)
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw,
                  dev_set_raw):
        prog = Progbar(target=1 +
                       int(len(train_examples) / self.config.batch_size))
        for i, batch in enumerate(
                minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
            if self.report: self.report.log_train_loss(loss)
        print("")

        logger.info("Evaluating on development data")
        token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw)
        logger.debug("Token-level confusion matrix:\n" + token_cm.as_table())
        logger.debug("Token-level scores:\n" + token_cm.summary())
        logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores)

        f1 = entity_scores[-1]
        return f1
예제 #17
0
    def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw, epoch):
        prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size))
        curr_loss = 0.
        num_encountered = 0
        for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)):
            loss = self.train_on_batch(sess, *batch)
            prog.update(i + 1, [("train loss", loss)])
            curr_loss += loss
            num_encountered += 1
            if self.report: self.report.log_train_loss(loss)
        train_loss.append(curr_loss/num_encountered)
        
        print("")

        logger.info("Evaluating on development data")
        divergence = self.evaluate(sess, dev_set, dev_set_raw)
        logger.info("KL divergence: %.2f", divergence)

        dev_loss.append(divergence)
        return divergence
예제 #18
0
    def evaluate(self, sess, examples, examples_raw):
        global pred_latest
        global gold_latest 
        global example_latest
        avg_div = 0.0
        seen = 0
        
        for i, batch in enumerate(minibatches(examples, self.config.batch_size, shuffle=False)):
            pred_label = self.predict_on_batch(sess, batch[0])
            gold_label = batch[1]
            divergence = np_kl_divergence(gold_label, pred_label)
            
            pred_latest = pred_label[:40]
            gold_latest = gold_label[:40]
            example_latest = [example[0] for example in examples_raw[:40]]

            avg_div += divergence
            seen += 1

        return avg_div/seen
예제 #19
0
 def run_epoch(self, sess, train_examples, dev_set, logfile=None):
     prog = Progbar(target=1 +
                    train_examples[0].shape[0] / self.config.batch_size)
     for i, (inputs_batch, outputs_batch) in enumerate(
             minibatches(train_examples, self.config.batch_size)):
         loss = self.train_on_batch(sess,
                                    inputs_batch,
                                    outputs_batch,
                                    get_loss=True)
         prog.update(i + 1, [("train loss", loss)])
     print("")
     print("Evaluating on train set...")
     train_loss = self.eval_batches(sess, train_examples,
                                    self.config.n_eval_batches)
     print("Train Loss: {0:.6f}".format(train_loss))
     print("Evaluating on dev set...")
     dev_loss = self.eval_batches(sess, dev_set, self.config.n_eval_batches)
     print("Dev Loss: {0:.6f}".format(dev_loss))
     logfile.write(",{0:.5f},{1:.5f}\n".format(float(train_loss),
                                               float(dev_loss)))
     return dev_loss
예제 #20
0
    def run_epoch(self,
                  session,
                  epoch_num,
                  training_set,
                  vocab,
                  validation_set,
                  sample_size=400):
        set_num = len(training_set)
        batch_size = self.config.batch_size
        batch_num = int(np.ceil(set_num * 1.0 / batch_size))
        sample_size = 400

        prog = Progbar(target=batch_num)
        avg_loss = 0
        for i, batch in enumerate(
                minibatches(training_set,
                            self.config.batch_size,
                            shuffle=False)):
            global_batch_num = batch_num * epoch_num + i
            _, summary, loss = self.optimize(session, batch)
            prog.update(i + 1, [("training loss", loss)])
            if self.config.tensorboard and global_batch_num % self.config.log_batch_num == 0:
                self.train_writer.add_summary(summary, global_batch_num)
            if (i + 1) % self.config.log_batch_num == 0:
                logging.info('')
                self.evaluate_answer(session,
                                     training_set,
                                     vocab,
                                     sample=sample_size,
                                     log=True)
                self.evaluate_answer(session,
                                     validation_set,
                                     vocab,
                                     sample=sample_size,
                                     log=True)
            avg_loss += loss
        avg_loss /= batch_num
        logging.info("Average training loss: {}".format(avg_loss))
        return avg_loss
예제 #21
0
    def predict(self, sess, dev, labels):
        predicted_sentences = []

        dev_processed, labels_processed = self.preprocess_sequence_data(
            dev, labels)
        print self.config.batch_size
        for i, (enc_batch, dec_batch) in enumerate(
                minibatches(dev_processed,
                            labels_processed,
                            self.config.batch_size,
                            shuffle=False)):
            print enc_batch.shape, dec_batch.shape
            feed = self.create_feed_dict(enc_batch, dec_batch)

            outputs = sess.run(self.pred_proj, feed_dict=feed)
            for i in range(outputs.shape[0]):
                predicted = " ".join(
                    [self.labels_vocab.id2tok[i] for i in outputs[i, :]])
                print predicted
                predicted_sentences.append(predicted)

        return predicted_sentences
    def predict(self, sess, dev, labels):
        predicted_sentences = []

        dev_processed, labels_processed = self.preprocess_sequence_data(
            dev, labels)
        prog = Progbar(target=1 +
                       int(len(dev_processed) / self.config.batch_size))
        # predict in batches
        for i, (enc_batch, dec_batch) in enumerate(
                minibatches(dev_processed,
                            labels_processed,
                            self.config.batch_size,
                            shuffle=False)):
            #print enc_batch.shape, dec_batch.shape
            feed = self.create_feed_dict(enc_batch, dec_batch)

            outputs = sess.run(self.test_op, feed_dict=feed)
            for j in range(outputs.shape[0]):
                predicted = self.construct_sentence(outputs[j, :])
                #print predicted
                predicted_sentences.append(predicted)
            prog.update(i + 1)

        return predicted_sentences
예제 #23
0
    def validate(self, sess, valid_dataset):
        """
        Iterate through the validation dataset and determine what
        the validation cost is.

        This method calls self.test() which explicitly calculates validation cost.

        How you implement this function is dependent on how you design
        your data iteration function

        :return:
        """
        batch_num = int(
            np.ceil(len(valid_dataset) * 1.0 / self.config.batch_size))
        prog = Progbar(target=batch_num)
        avg_loss = 0
        for i, batch in enumerate(
                minibatches(valid_dataset, self.config.batch_size)):
            loss = self.test(sess, batch)[0]
            prog.update(i + 1, [("validation loss", loss)])
            avg_loss += loss
        avg_loss /= batch_num
        logging.info("Average validation loss: {}".format(avg_loss))
        return avg_loss