def evaluate(self, batches, mode, output_path=None): results = [] used_batches = [] for i, batch in enumerate(batches): input_feed = self.get_input_feed(batch, False) #outputs = np.array([random.randint(0, self.vocab.category.size-1) # for _ in range(batch.contexts.word.shape[0])]) outputs = self.sess.run(self.predictions, input_feed) try: used_batches += flatten_batch(batch) except Exception as e: pprint(batch) print(e) exit(1) results.append(outputs) results = np.concatenate(results, axis=0) sys.stdout = open(output_path, 'w') if output_path else sys.stdout accuracy = evaluate_and_print(used_batches, results, vocab=self.encoder.vocab) sys.stdout = sys.__stdout__ if output_path: sys.stderr.write( "Output the testing results to \'{}\' .\n".format(output_path)) summary_dict = {} summary_dict['category/%s/Accuracy' % mode] = accuracy summary = make_summary(summary_dict) return accuracy, summary
def evaluate(self, batches, mode, output_path=None): start_time = time.time() results = [] used_batches = [] for i, batch in enumerate(batches): input_feed = self.get_input_feed(batch, False) ce = self.sess.run(self.loss, input_feed) outputs = self.sess.run(self.outputs, input_feed) used_batches += flatten_batch(batch) results.append(outputs) results = np.concatenate(results, axis=0) epoch_time = time.time() - start_time sys.stdout = open(output_path, 'w') if output_path else sys.stdout acc, prec, recall = evaluate_and_print(used_batches, results, vocab=self.encoder.vocab) print('acc, p, r, f = %.2f %.2f %.2f %.2f' % (100.0 * acc, 100.0 * prec, 100.0 * recall, 100.0 * (prec + recall) / 2)) sys.stdout = sys.__stdout__ summary_dict = {} summary_dict['graph/%s/Accuracy' % mode] = acc summary_dict['graph/%s/Precision' % mode] = prec summary_dict['graph/%s/Recall' % mode] = recall summary_dict['graph/%s/F1' % mode] = (prec + recall) / 2 summary = make_summary(summary_dict) return (acc, prec, recall), summary
def demo(self, roles, picks, bans): batch = create_test_batch(roles, picks, bans, self.vocab) input_feed = self.get_input_feed(batch, False) output_feed = self.predictions predictions = self.sess.run(output_feed, input_feed) for example, prediction in zip(flatten_batch(batch), predictions): print_example(example, prediction=prediction) print('') return batch, predictions
def debug(self): task_name = list(self.config.tasks.keys())[0] t = time.time() mode = 'train' batches = self.get_batch(mode)[task_name] #model = self.create_model(self.config) for i, batch in enumerate(batches): for k, v in flatten_recdict(batch).items(): if isinstance(v, np.ndarray): print(k, v.shape) else: print(k, type(v)) for j, ex in enumerate(flatten_batch(batch)): print('<%03d-%03d>' % (i, j)) print_example(ex, self.vocab) #self.dataset[task_name].print_example(b, self.vocab) print('') exit(1) exit(1)
def test(self, batches, mode, logger, output_path=None): results = [] used_batches = [] for i, batch in enumerate(batches): input_feed = self.get_input_feed(batch, False) relations, mentions = self.sess.run(self.predictions, input_feed) try: used_batches += flatten_batch(batch) except Exception as e: pprint(batch) print(e) exit(1) for rel, mention in zip(relations.tolist(), mentions.tolist()): results.append((rel, mention)) sys.stdout = open(output_path, 'w') if output_path else sys.stdout triples, mentions = dataset_class.formatize_and_print( used_batches, results, vocab=self.encoder.vocab) triple_precision, triple_recall, triple_f1 = dataset_class.evaluate_triples( triples) mention_precision, mention_recall, mention_f1 = dataset_class.evaluate_mentions( mentions) sys.stdout = sys.__stdout__ if output_path: sys.stderr.write( "Output the testing results to \'{}\' .\n".format(output_path)) summary_dict = {} summary_dict['relex/%s/triple/f1' % mode] = triple_f1 summary_dict['relex/%s/triple/precision' % mode] = triple_precision summary_dict['relex/%s/triple/recall' % mode] = triple_recall summary_dict['relex/%s/mention/f1' % mode] = mention_f1 summary_dict['relex/%s/mention/precision' % mode] = mention_precision summary_dict['relex/%s/mention/recall' % mode] = mention_recall summary = make_summary(summary_dict) return triple_f1, summary
def test(self, batches, mode, logger, output_path): results = np.zeros([0, 2]) used_batches = [] sys.stderr.write('Start decoding (%s) ...\n' % mode) for i, batch in enumerate(batches): input_feed = self.get_input_feed(batch, False) # output_feed = [ # self.predictions, # ] output_feed = self.predictions outputs = self.sess.run(output_feed, input_feed) # Flatten the batch and outputs. used_batches += flatten_batch(batch) results = np.concatenate([results, outputs]) sys.stdout = open(output_path, 'w') if output_path else sys.stdout sys.stderr.write('%d %d\n' % (len(results), len(used_batches))) acc = evaluate_and_print(used_batches, results) sys.stdout = sys.__stdout__ summary_dict = {} summary_dict['%s/%s/accuracy' % (self.scopename, mode)] = acc summary = make_summary(summary_dict) return acc, summary
def test(self, batches, mode, logger, output_path): results = [] used_batches = [] for i, batch in enumerate(batches): input_feed = self.get_input_feed(batch, False) outputs = self.sess.run(self.predictions, input_feed) try: used_batches += flatten_batch(batch) except Exception as e: pprint(batch) print(e) exit(1) results.append(outputs[:, 0, :]) results = flatten([r.tolist() for r in results]) sys.stdout = open(output_path, 'w') if output_path else sys.stdout bleu = evaluate_and_print(used_batches, results, vocab=self.vocab) if output_path: sys.stderr.write( "Output the testing results to \'{}\' .\n".format(output_path)) sys.stdout = sys.__stdout__ summary_dict = {} summary_dict['desc/%s/BLEU' % mode] = bleu summary = make_summary(summary_dict) return bleu, summary