def dump(**kwargs): common = common_args(**kwargs) t = Terminal() ranking_line = " {prob:6.2f}% → {color}{text}{t.normal}" actual_line = "{t.red}Actual{t.normal}: {t.bold}{actual_text}{t.normal}" sent_forwards = Sentences(common.file_vector, size=SENTENCE_LENGTH, backwards=False) sent_backwards = Sentences(common.file_vector, size=SENTENCE_LENGTH, backwards=True) least_agreement = [] forwards_predictions = [] backwards_predictions = [] ranks = [] contexts = enumerate(zip(chop_prefix(common.tokens, PREFIX_LENGTH), sent_forwards, chop_prefix(sent_backwards))) # Note, the index is offset from the true start; i.e., when # index == 0, the true index is SENTENCE_LENGTH for index, (token, (prefix, x1), (suffix, x2)) in contexts: assert x1 == x2 actual = x1 print(unvocabularize(prefix[-5:]), t.bold_underline(token.value), unvocabularize(suffix[:5])) prefix_pred = common.forwards_model.predict(prefix) suffix_pred = common.backwards_model.predict(suffix) # harmonic mean mean = consensus(suffix_pred, prefix_pred) forwards_predictions.append(index_of_max(prefix_pred)) backwards_predictions.append(index_of_max(suffix_pred)) paired_rankings = rank(mean) ranked_vocab = list(tuple(zip(*paired_rankings))[0]) top_5 = paired_rankings[:5] top_5_words = ranked_vocab[:5] for token_id, weight in top_5: color = t.green if token_id == actual else '' text = vocabulary.to_text(token_id) prob = weight * 100.0 print(ranking_line.format_map(locals())) ranks.append(ranked_vocab.index(actual) + 1) min_token_id, min_prob = paired_rankings[0] least_agreement.append(Agreement(min_prob, index)) if actual not in top_5_words: actual_text = vocabulary.to_text(actual) print(actual_line.format_map(locals())) print() if not ranks: print(t.red("Could not analyze file!"), file=sys.stderr) return print("MRR: ", mean_reciprocal_rank(ranks)) print("Lowest rank:", max(ranks)) print("Time at #1: {:.2f}%".format( 100 * sum(1 for rank in ranks if rank == 1) / len(ranks))) print() forwards_text = [vocabulary.to_text(num) for num in forwards_predictions] backwards_text = [vocabulary.to_text(num) for num in backwards_predictions] least_agreement.sort() # Compensate for offset indices file_vector = common.file_vector[SENTENCE_LENGTH:] tokens_text = [tok.value for tok in common.tokens[PREFIX_LENGTH:]] for disagreement in least_agreement[:5]: print(disagreement.probability) prefix = ' '.join(disagreement.prefix(tokens_text)) suffix = ' '.join(disagreement.suffix(tokens_text)) print(" ", prefix, t.yellow(forwards_text @ disagreement), suffix) print(" ", prefix, t.underline(tokens_text @ disagreement), suffix) print(" ", prefix, t.blue(backwards_text @ disagreement), suffix) print()