def __init__(self, config, max_vocab_size=10000): self.config = config self.max_utt_len = config.max_utt_len self.black_domains = config.black_domains self.black_ratio = config.black_ratio self.include_domain = config.include_domain self.include_example = config.include_example self.include_state = config.include_state self.tokenize = get_tokenize() train_data, train_meta = self._read_file(self.config.train_dir, is_train=True) test_data, test_meta = self._read_file(self.config.test_dir, is_train=False) # combine train and test domain meta train_meta.update(test_meta) self.domain_meta = self._process_meta(train_meta) self.corpus = self._process_dialog(train_data) self.test_corpus = self._process_dialog(test_data) self.logger.info("Loaded Corpus with %d, test %d" % (len(self.corpus), len(self.test_corpus))) # build up a vocabulary self.vocab, self.rev_vocab = self._build_vocab(max_vocab_size) self.logger.info("Done loading corpus")
def __init__(self, config): self.config = config self._path = config.data_dir[0] self.max_utt_len = config.max_utt_len self.tokenize = get_tokenize() self.black_domains = config.black_domains self.black_ratio = config.black_ratio self.train_corpus = self._read_file(os.path.join(self._path, 'kvret_train_public.json')) self.valid_corpus = self._read_file(os.path.join(self._path, 'kvret_dev_public.json')) self.test_corpus = self._read_file(os.path.join(self._path, 'kvret_test_public.json')) with open(os.path.join(self._path, 'kvret_entities.json'), 'rb') as f: self.ent_metas = json.load(f) self.domain_descriptions = self._read_domain_descriptions(self._path) self._build_vocab() print("Done loading corpus")
def get_report(self, include_error=False): reports = [] tokenize = get_tokenize() for domain, labels in self.domain_labels.items(): predictions = self.domain_hyps[domain] self.logger.info("Generate report for {} for {} samples".format( domain, len(predictions))) refs, hyps = [], [] # find entity precision, recall and f1 tp, fp, fn = 0.0, 0.0, 0.0 for label, hyp in zip(labels, predictions): label = label.replace(EOS, '').replace(BOS, '') hyp = hyp.replace(EOS, '').replace(BOS, '') ref_tokens = tokenize(label)[2:] hyp_tokens = tokenize(hyp)[2:] refs.append([ref_tokens]) hyps.append(hyp_tokens) label_ents = self.pred_ents(label, tokenize, None) hyp_ents = self.pred_ents(hyp, tokenize, None) # hyp_ents = list(set(hyp_ents)) ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ents, hyp_ents) tp += ttpp fp += ffpp fn += ffnn ent_precision, ent_recall, ent_f1 = self._get_prec_recall( tp, fp, fn) # compute corpus level scores bleu = bleu_score.corpus_bleu( refs, hyps, smoothing_function=SmoothingFunction().method1) report = "\nDomain: %s BLEU %f\n Entity precision %f recall %f and f1 %f\n" \ % (domain, bleu, ent_precision, ent_recall, ent_f1) reports.append(report) return "\n==== REPORT===={report}".format( report="========".join(reports))
def get_report(self, include_error=False): reports = [] errors = [] for domain, labels in self.domain_labels.items(): intent2refs = defaultdict(list) intent2hyps = defaultdict(list) predictions = self.domain_hyps[domain] self.logger.info("Generate report for {} for {} samples".format( domain, len(predictions))) # find entity precision, recall and f1 tp, fp, fn = 0.0, 0.0, 0.0 # find intent precision recall f1 itp, ifp, ifn = 0.0, 0.0, 0.0 # backend accuracy btp, bfp, bfn = 0.0, 0.0, 0.0 # BLEU score refs, hyps = [], [] pred_intents = self.pred_acts(predictions) label_intents = self.pred_acts(labels) tokenize = get_tokenize() bad_predictions = [] for label, hyp, label_ints, pred_ints in zip( labels, predictions, label_intents, pred_intents): refs.append([label.split()]) hyps.append(hyp.split()) label_ents = self.pred_ents(label, tokenize, domain) pred_ents = self.pred_ents(hyp, tokenize, domain) for intent in label_ints: intent2refs[intent].append([label.split()]) intent2hyps[intent].append(hyp.split()) # update the intent ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ints, pred_ints) itp += ttpp ifp += ffpp ifn += ffnn # entity or KB search ttpp, ffpp, ffnn = self._get_tp_fp_fn(label_ents, pred_ents) if ffpp > 0 or ffnn > 0: bad_predictions.append((label, hyp)) if "query" in label_ints: btp += ttpp bfp += ffpp bfn += ffnn else: tp += ttpp fp += ffpp fn += ffnn # compute corpus level scores bleu = bleu_score.corpus_bleu( refs, hyps, smoothing_function=SmoothingFunction().method1) ent_precision, ent_recall, ent_f1 = self._get_prec_recall( tp, fp, fn) int_precision, int_recall, int_f1 = self._get_prec_recall( itp, ifp, ifn) back_precision, back_recall, back_f1 = self._get_prec_recall( btp, bfp, bfn) # compute BLEU w.r.t intents intent_report = [] for intent in intent2refs.keys(): i_bleu = bleu_score.corpus_bleu( intent2refs[intent], intent2hyps[intent], smoothing_function=SmoothingFunction().method1) intent_report.append("{}: {}".format(intent, i_bleu)) intent_report = "\n".join(intent_report) # create bad cases error = '' if include_error: error = '\nDomain {} errors\n'.format(domain) error += "\n".join([ 'True: {} ||| Pred: {}'.format(r, h) for r, h in bad_predictions ]) report = "\nDomain: %s\n" \ "Entity precision %f recall %f and f1 %f\n" \ "Intent precision %f recall %f and f1 %f\n" \ "KB precision %f recall %f and f1 %f\n" \ "BLEU %f BEAK %f\n\n%s\n" \ % (domain, ent_precision, ent_recall, ent_f1, int_precision, int_recall, int_f1, back_precision, back_recall, back_f1, bleu, gmean([ent_f1, int_f1, back_f1, bleu]), intent_report) reports.append(report) errors.append(error) if include_error: return "\n==== REPORT===={error}\n========\n {report}".format( error="========".join(errors), report="========".join(reports)) else: return "\n==== REPORT===={report}".format( report="========".join(reports))