def evaluate(self, dataset, eval_dev=True): if eval_dev: self.logger.info("Testing model over DEVELOPMENT dataset") else: self.logger.info('Testing model over TEST dataset') actuals = [] predicts = [] seq_lengths = [] for words, labels in batch_iter(dataset, self.cfg.batch_size): labels_pred, sequence_lengths = self.predict(words) actuals.append(labels) predicts.append(labels_pred) seq_lengths.append(sequence_lengths) eval_score = compute_accuracy_f1(actuals, predicts, seq_lengths, self.cfg.train_task, self.cfg.tag_vocab) self.logger.info('accuracy: {:04.2f} -- f1 score: {:04.2f}'.format(eval_score['acc'], eval_score['f1'])) return eval_score
def evaluate(self, dataset, eval_dev=True): actuals = [] predicts = [] seq_lengths = [] for words, labels in batch_iter(dataset, self.cfg.batch_size): labels_pred, sequence_lengths = self.predict(words) actuals.append(labels) predicts.append(labels_pred) seq_lengths.append(sequence_lengths) eval_score = compute_accuracy_f1(actuals, predicts, seq_lengths, self.cfg.train_task, self.cfg.tag_vocab) self.logger.info( "Testing model over {} dataset: accuracy - {:04.2f}, f1 score - {:04.2f}" .format('DEVELOPMENT' if eval_dev else 'TEST', eval_score['acc'], eval_score['f1'])) return eval_score