def show(out_file_name, summarized_table: List[Entry]): html = HtmlVisualizer(out_file_name) tokenizer = get_tokenizer() num_print = 0 for input_ids, prob, contributions in summarized_table: tokens = tokenizer.convert_ids_to_tokens(input_ids) html.write_paragraph("Score : {}".format(prob)) cells = [] max_change = 0 for idx in range(len(input_ids)): token = tokens[idx] if token == "[PAD]": break if idx in contributions: raw_score = contributions[idx] max_change = max(abs(raw_score), max_change) score = abs(raw_score) * 100 color = "R" if raw_score > 0 else "B" c = Cell(token, highlight_score=score, target_color=color) else: c = Cell(token, highlight_score=150, target_color="Gray") cells.append(c) if max_change < 0.05: pass else: html.multirow_print(cells, 30) num_print += 1 print("printed {} of {}".format(num_print, len(summarized_table)))
def draw(): #name = "pc_para_D_grad" name = "pc_para_I_grad" #name = "pc_para_H_grad" data = EstimatorPredictionViewerGosford(name) html_writer = HtmlVisualizer(name + ".html", dark_mode=False) for inst_i, entry in enumerate(data): tokens = entry.get_tokens("input_ids") grad = entry.get_vector("gradient") m = min(grad) cells = data.cells_from_tokens(tokens) for i, cell in enumerate(cells): cells[i].highlight_score = min(abs(grad[i]) * 1e4, 255) cells[i].target_color = "B" if grad[i] > 0 else "R" print(grad) prob = softmax(entry.get_vector("logits")) pred = np.argmax(prob) label = entry.get_vector("labels") html_writer.write_paragraph("Label={} / Pred={}".format(str(label), pred)) html_writer.multirow_print(cells)
def show_tfrecord(file_path): itr = load_record_v2(file_path) tokenizer = get_tokenizer() name = os.path.basename(file_path) html = HtmlVisualizer(name + ".html") for features in itr: input_ids = take(features["input_ids"]) alt_emb_mask = take(features["alt_emb_mask"]) tokens = tokenizer.convert_ids_to_tokens(input_ids) p_tokens, h_tokens = split_p_h_with_input_ids(tokens, input_ids) p_mask, h_mask = split_p_h_with_input_ids(alt_emb_mask, input_ids) p_cells = [ Cell(p_tokens[i], 100 if p_mask[i] else 0) for i in range(len(p_tokens)) ] h_cells = [ Cell(h_tokens[i], 100 if h_mask[i] else 0) for i in range(len(h_tokens)) ] label = take(features["label_ids"])[0] html.write_paragraph("Label : {}".format(label)) html.write_table([p_cells]) html.write_table([h_cells])
def run(): tokenizer = get_tokenizer() spr = StreamPickleReader("contradiction_prediction") html = HtmlVisualizer("contradiction_prediction.html") cnt = 0 while spr.has_next(): item = spr.get_item() e, p = item input_ids, _, _ = e logit, explain = p tokens = tokenizer.convert_ids_to_tokens(input_ids) p, h = split_p_h_with_input_ids(tokens, input_ids) p_score, h_score = split_p_h_with_input_ids(explain, input_ids) p_score = normalize(p_score) h_score = normalize(h_score) p_cells = [Cell("P:")] + cells_from_tokens(p, p_score) h_cells = [Cell("H:")] + cells_from_tokens(h, h_score) html.write_paragraph(str(logit)) html.multirow_print(p_cells) html.multirow_print(h_cells) if cnt > 100: break cnt += 1
def dev(): train_data_feeder = load_cache("train_data_feeder") tokenizer = tokenizer_wo_tf.FullTokenizer( os.path.join(data_path, "bert_voca.txt")) html_writer = HtmlVisualizer("nli_w_dict.html", dark_mode=False) for _ in range(100): batch = train_data_feeder.get_random_batch(1) input_ids, input_mask, segment_ids, d_input_ids, d_input_mask, d_location_ids, y = batch tokens = tokenizer.convert_ids_to_tokens(input_ids[0]) for i in range(len(tokens)): if i is not 0 and i in d_location_ids: tokens[i] = "<b>{}</b>".format(tokens[i]) if tokens[i] == "[unused3]": tokens[i] = "[SEP]\n" s = tokenizer_wo_tf.pretty_tokens(tokens) html_writer.write_headline("Input") html_writer.write_paragraph(s) d_tokens = tokenizer.convert_ids_to_tokens(d_input_ids[0]) for i in range(len(d_tokens)): if tokens[i] == "[unused5]": tokens[i] = "<br>\n" s = tokenizer_wo_tf.pretty_tokens(d_tokens) html_writer.write_headline("Dict def") html_writer.write_paragraph(s) html_writer.close()
def print_paragraph_feature(pf_list: List[ParagraphFeature], out_path: FilePath): html = HtmlVisualizer(out_path) for pf in pf_list: html.write_paragraph("Text 1: " + pf.datapoint.text1) html.write_paragraph("Text 2: " + pf.datapoint.text2) for f in pf.feature: s = " ".join(f.paragraph.tokens) html.write_paragraph(s) html.close()
def visualize_prediction_data(data_id): tokenizer = get_tokenizer() num_samples_list = open( os.path.join(working_path, "entry_prediction_n", data_id), "r").readlines() p = os.path.join(working_path, "entry_loss", "entry{}.pickle".format(data_id)) loss_outputs_list = pickle.load(open(p, "rb")) print("Loaded input data") loss_outputs = [] for e in loss_outputs_list: loss_outputs.extend(e["masked_lm_example_loss"]) print("Total of {} loss outputs".format(len(loss_outputs))) instance_idx = 0 feature_itr = load_record_v2( os.path.join(working_path, "entry_prediction_tf.done", data_id)) n = len(num_samples_list) n = 100 html = HtmlVisualizer("entry_prediction.html") for i in range(n): n_sample = int(num_samples_list[i]) assert n_sample > 0 first_inst = feature_itr.__next__() feature = Feature2Text(first_inst, tokenizer) html.write_headline("Input:") html.write_paragraph(feature.get_input_as_text(True, True)) html.write_headline("Word:" + feature.get_selected_word_text()) if instance_idx + n_sample >= len(loss_outputs): break if n_sample == 1: continue rows = [] no_dict_loss = loss_outputs[instance_idx] row = [Cell(no_dict_loss, 0), Cell("")] rows.append(row) instance_idx += 1 for j in range(1, n_sample): feature = Feature2Text(feature_itr.__next__(), tokenizer) def_cell = Cell(feature.get_def_as_text()) loss = loss_outputs[instance_idx] hl_score = 100 if loss < no_dict_loss * 0.9 else 0 row = [Cell(loss, hl_score), def_cell] rows.append(row) instance_idx += 1 html.write_table(rows)
def show(html_visualizer: HtmlVisualizer, features: List[ParagraphClaimPersFeature]): print("Cid: ", features[0].claim_pers.cid) for f in features: html_visualizer.write_paragraph("Claim: " + f.claim_pers.claim_text) html_visualizer.write_paragraph("Perspective: " + f.claim_pers.p_text) pc_tokens: List[str] = nltk.word_tokenize( f.claim_pers.claim_text) + nltk.word_tokenize(f.claim_pers.p_text) pc_tokens_set = set([t.lower() for t in pc_tokens]) print(pc_tokens_set) def get_cell(token) -> Cell: if token.lower() in pc_tokens_set: score = 100 else: score = 0 return Cell(token, score) html_visualizer.write_paragraph("Label : {}".format( f.claim_pers.label)) for score_paragraph in f.feature: paragraph = score_paragraph.paragraph cells = [get_cell(t) for t in paragraph.tokens] html_visualizer.write_paragraph("---") html_visualizer.multirow_print(cells, width=20)
def show(all_info): html = HtmlVisualizer("cppnc.html") cnt = 0 for cpid, value in all_info.items(): score, rel_score, info = value[0] html.write_headline("Claim {}: {}".format(info['cid'], info['c_text'])) html.write_headline("Perspective: " + info['p_text']) for score, rel_score, info in value: html.write_headline("score: {}".format(score)) html.write_headline("rel_score: {}".format(rel_score)) html.write_paragraph(" ".join(info['passage'])) cnt += 1 if cnt > 10000: break
def main(): save_name = "alamri_mismatch_all" output_d = load_from_pickle(save_name) html = HtmlVisualizer("alamri_mismatch.html") tokenizer = get_tokenizer() logits_grouped_by_layer = output_d["per_layer_logits"] num_layers = 12 def float_arr_to_cell(head, float_arr): return [Cell(head)] + lmap(Cell, map(two_digit_float, float_arr)) def float_arr_to_cell2(head, float_arr): return [Cell(head)] + lmap(Cell, map("{0:.4f}".format, float_arr)) num_data = len(output_d['input_ids']) for data_idx in range(num_data)[:100]: def get(name): return output_d[name][data_idx] tokens = tokenizer.convert_ids_to_tokens(get("input_ids")) ex_scores = get('ex_scores') probs = scipy.special.softmax(get('logits')) pred_str = make_prediction_summary_str(probs) html.write_paragraph("Prediction: {}".format(pred_str)) html.write_paragraph("gold label={}".format(get("label"))) row1 = [Cell("")] + list( [Cell(t, int(s * 100)) for t, s in zip(tokens, ex_scores)]) row2 = float_arr_to_cell("ex_prob", ex_scores) for i, s in enumerate(ex_scores): if s > 0.5: row2[i + 1].highlight_score = 100 rows = [row1, row2] for layer_no in range(num_layers): layer_logit = logits_grouped_by_layer[layer_no][data_idx] probs = sigmoid(layer_logit) row = float_arr_to_cell("layer_{}".format(layer_no), probs[:, 1]) rows.append(row) html.write_table(rows)
def print_as_html(fn): examples = load_record(fn) tokenizer = tokenizer_wo_tf.FullTokenizer( os.path.join(data_path, "bert_voca.txt")) html_output = HtmlVisualizer("out_name.html") for feature in examples: masked_inputs = feature["input_ids"].int64_list.value idx = 0 step = 512 while idx < len(masked_inputs): slice = masked_inputs[idx:idx + step] tokens = tokenizer.convert_ids_to_tokens(slice) idx += step cells = cells_from_tokens(tokens) html_output.multirow_print(cells) html_output.write_paragraph("----------")
def show_prediction(filename, file_path, correctness_1, correctness_2): data = EstimatorPredictionViewerGosford(filename) itr = load_record_v2(file_path) tokenizer = get_tokenizer() name = os.path.basename(filename) html = HtmlVisualizer(name + ".html") idx = 0 for entry in data: features = itr.__next__() input_ids = entry.get_vector("input_ids") input_ids2 = take(features["input_ids"]) assert np.all(input_ids == input_ids2) alt_emb_mask = take(features["alt_emb_mask"]) tokens = tokenizer.convert_ids_to_tokens(input_ids) p_tokens, h_tokens = split_p_h_with_input_ids(tokens, input_ids) p_mask, h_mask = split_p_h_with_input_ids(alt_emb_mask, input_ids) p_cells = [ Cell(p_tokens[i], 100 if p_mask[i] else 0) for i in range(len(p_tokens)) ] h_cells = [ Cell(h_tokens[i], 100 if h_mask[i] else 0) for i in range(len(h_tokens)) ] label = take(features["label_ids"])[0] logits = entry.get_vector("logits") pred = np.argmax(logits) if not correctness_1[idx] or not correctness_2[idx]: html.write_paragraph("Label : {} Correct: {}/{}".format( label, correctness_1[idx], correctness_2[idx])) html.write_table([p_cells]) html.write_table([h_cells]) idx += 1
def per_doc_score(): filename = "fetch_hidden_dim.pickle" html_writer = HtmlVisualizer("preserved.html", dark_mode=False) p = os.path.join(output_path, filename) raw_data = pickle.load(open(p, "rb")) n_skip = 0 data = EstimatorPredictionViewerGosford(filename) for inst_i, entry in enumerate(data): if inst_i > 100: break count_preserved = entry.get_vector("layer_count") tokens = entry.get_tokens("input_ids") cells = data.cells_from_tokens(tokens) valid_parst = count_preserved[:len(cells)] avg = np.average(count_preserved) row = [] row2 = [] #f_print = avg > 20 f_print = True print(avg) if f_print: html_writer.write_paragraph("Skipped {} articles".format(n_skip)) n_skip = 0 for idx, cell in enumerate(cells): score = count_preserved[idx] / 728 * 100 cell.highlight_score = score row.append(cell) row2.append((Cell(count_preserved[idx], score))) if len(row) == 20: html_writer.write_table([row, row2]) row = [] row2 = [] html_writer.write_paragraph(str(avg)) else: n_skip += 1
def main(): n_factor = 16 step_size = 16 max_seq_length = 128 max_seq_length2 = 128 - 16 batch_size = 8 info_file_path = at_output_dir("robust", "seg_info") queries = load_robust_04_query("desc") qid_list = get_robust_qid_list() f_handler = get_format_handler("qc") info: Dict = load_combine_info_jsons(info_file_path, f_handler.get_mapping(), f_handler.drop_kdp()) print(len(info)) tokenizer = get_tokenizer() for job_idx in [1]: qid = qid_list[job_idx] query = queries[str(qid)] q_term_length = len(tokenizer.tokenize(query)) data_path1 = os.path.join(output_path, "robust", "windowed_{}.score".format(job_idx)) data_path2 = os.path.join(output_path, "robust", "windowed_small_{}.score".format(job_idx)) data1 = OutputViewer(data_path1, n_factor, batch_size) data2 = OutputViewer(data_path2, n_factor, batch_size) segment_len = max_seq_length - 3 - q_term_length segment_len2 = max_seq_length2 - 3 - q_term_length outputs = [] for d1, d2 in zip(data1, data2): # for each query, doc pairs cur_info1 = info[d1['data_id']] cur_info2 = info[d2['data_id']] query_doc_id1 = f_handler.get_pair_id(cur_info1) query_doc_id2 = f_handler.get_pair_id(cur_info2) assert query_doc_id1 == query_doc_id2 doc = d1['doc'] probs = get_probs(d1['logits']) probs2 = get_probs(d2['logits']) n_pred_true = np.count_nonzero(np.less(0.5, probs)) print(n_pred_true, len(probs)) seg_scores: List[Tuple[int, int, float]] = get_piece_scores( n_factor, probs, segment_len, step_size) seg_scores2: List[Tuple[int, int, float]] = get_piece_scores( n_factor, probs2, segment_len2, step_size) ss_list = [] for st, ed, score in seg_scores: try: st2, ed2, score2 = find_where(lambda x: x[1] == ed, seg_scores2) assert ed == ed2 assert st < st2 tokens = tokenizer.convert_ids_to_tokens(doc[st:st2]) diff = score - score2 ss = ScoredPiece(st, st2, diff, tokens) ss_list.append(ss) except StopIteration: pass outputs.append((probs, probs2, query_doc_id1, ss_list)) html = HtmlVisualizer("windowed.html") for probs, probs2, query_doc_id, ss_list in outputs: html.write_paragraph(str(query_doc_id)) html.write_paragraph("Query: " + query) ss_list.sort(key=lambda ss: ss.st) prev_end = None cells = [] prob_str1 = lmap(two_digit_float, probs) prob_str1 = ["8.88"] + prob_str1 prob_str2 = lmap(two_digit_float, probs2) html.write_paragraph(" ".join(prob_str1)) html.write_paragraph(" ".join(prob_str2)) for ss in ss_list: if prev_end is not None: assert prev_end == ss.st else: print(ss.st) score = abs(int(100 * ss.score)) color = "B" if score > 0 else "R" cells.extend( [Cell(t, score, target_color=color) for t in ss.tokens]) prev_end = ss.ed html.multirow_print(cells)
def join_docs_and_lm(): gold = get_claim_perspective_id_dict() d_ids = list(load_train_claim_ids()) claims: List[Dict] = get_claims_from_ids(d_ids) claims = claims[:10] top_n = 10 q_res_path = FilePath( "/mnt/nfs/work3/youngwookim/data/perspective/train_claim/q_res_100") ranked_list: Dict[ str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path) preload_docs(ranked_list, claims, top_n) claim_lms = build_gold_lms(claims) claim_lms_d = {lm.cid: lm for lm in claim_lms} bg_lm = average_counters(lmap(lambda x: x.LM, claim_lms)) log_bg_lm = get_lm_log(bg_lm) stopwords.update([".", ",", "!", "?"]) alpha = 0.1 html_visualizer = HtmlVisualizer("doc_lm_joined.html") def get_cell_from_token2(token, probs): if token.lower() in stopwords: probs = 0 probs = probs * 1e5 s = min(100, probs) c = Cell(token, s) return c tokenizer = PCTokenizer() for c in claims: q_res: List[SimpleRankedListEntry] = ranked_list[str(c['cId'])] html_visualizer.write_headline("{} : {}".format(c['cId'], c['text'])) clusters: List[List[int]] = gold[c['cId']] for cluster in clusters: html_visualizer.write_paragraph("---") p_text_list: List[str] = lmap(perspective_getter, cluster) for text in p_text_list: html_visualizer.write_paragraph(text) html_visualizer.write_paragraph("---") claim_lm = claim_lms_d[c['cId']] topic_lm_prob = smooth(claim_lm.LM, bg_lm, alpha) log_topic_lm = get_lm_log(smooth(claim_lm.LM, bg_lm, alpha)) log_odd: Counter = subtract(log_topic_lm, log_bg_lm) s = "\t".join(left(log_odd.most_common(30))) html_visualizer.write_paragraph("Log odd top: " + s) not_found = set() def get_log_odd(x): x = tokenizer.stemmer.stem(x) if x not in log_odd: not_found.add(x) return log_odd[x] def get_probs(x): x = tokenizer.stemmer.stem(x) if x not in topic_lm_prob: not_found.add(x) return topic_lm_prob[x] for i in range(top_n): try: doc = load_doc(q_res[i].doc_id) cells = lmap(lambda x: get_cell_from_token(x, get_log_odd(x)), doc) html_visualizer.write_headline("Doc rank {}".format(i)) html_visualizer.multirow_print(cells, width=20) except KeyError: pass html_visualizer.write_paragraph("Not found: {}".format(not_found))
def write_deletion_score_to_html(out_file_name, summarized_table: List[Entry], info: Dict[int, Dict]): text_to_info = claim_text_to_info() html = HtmlVisualizer(out_file_name) tokenizer = get_biobert_tokenizer() num_print = 0 for entry in summarized_table: tokens = tokenizer.convert_ids_to_tokens(entry.input_ids) idx_sep1, idx_sep2 = get_sep_loc(entry.input_ids) max_change = 0 max_drop = 0 cells = cells_from_tokens(tokens) drops = [] for idx in range(len(tokens)): if tokens[idx] == "[PAD]": break if tokens[idx] == '[SEP]': continue if idx in entry.contribution: raw_score = entry.contribution[idx] e = idx, raw_score drops.append(e) drops.sort(key=get_second) _, largest_drop = drops[0] max_drop_idx = -1 max_drop_case_logit = None for idx in range(len(tokens)): if tokens[idx] == "[PAD]": break if tokens[idx] == '[SEP]': continue if idx in entry.contribution: raw_score = entry.contribution[idx] max_change = max(abs(raw_score), max_change) if max_drop > raw_score: max_drop = raw_score max_drop_idx = idx max_drop_case_logit = entry.case_logits_d[idx] if raw_score < 0: score = abs(raw_score / largest_drop) * 200 color = "B" else: score = 0 color = "B" else: score = 150 color = "Gray" cells[idx].highlight_score = score cells[idx].target_color = color if max_change < 0.05 and False: pass else: # if random.random() < 0.90: # continue base_probs = scipy.special.softmax(entry.base_logits) info_entry = info[str(entry.data_id[0])] claim1_info: Dict = text_to_info[info_entry['text1']] claim2_info: Dict = text_to_info[info_entry['text2']] question = claim1_info['question'] assertion1 = claim1_info['assertion'] assertion2 = claim2_info['assertion'] original_prediction_summary = make_prediction_summary_str( base_probs) html.write_bar() html.write_paragraph("Question: {}".format(question)) html.write_paragraph("Original prediction: " + original_prediction_summary) html.write_paragraph("Max drop") rows = [] for idx, score in drops[:5]: row = [Cell(str(idx)), Cell(tokens[idx]), Cell(score)] rows.append(row) html.write_table(rows) min_token = tokens[max_drop_idx] html.write_paragraph("> \"{}\": {} ".format(min_token, max_drop)) max_drop_case_prob = scipy.special.softmax(max_drop_case_logit) max_drop_prediction_summary = make_prediction_summary_str( max_drop_case_prob) html.write_paragraph("> " + max_drop_prediction_summary) p = [Cell("Claim1 ({}):".format(assertion1))] + cells[1:idx_sep1] h = [Cell("Claim2 ({}):".format(assertion2)) ] + cells[idx_sep1 + 1:idx_sep2] html.write_table([p]) html.write_table([h]) num_print += 1 print("printed {} of {}".format(num_print, len(summarized_table)))
def main(): save_name = "alamri_pair" info_entries, output_d = load_from_pickle(save_name) html = HtmlVisualizer("alamri_pairing_deletion.html", use_tooltip=True) initial_text = load_p_h_pair_text( at_output_dir("alamri_pilot", "true_pair_small.csv")) per_group_summary: List[PerGroupSummary] = summarize_pair_deletion_results( info_entries, output_d) def float_arr_to_str_arr(float_arr): return list(map(two_digit_float, float_arr)) def float_arr_to_cell(head, float_arr): return [Cell(head)] + lmap(Cell, map(two_digit_float, float_arr)) def float_arr_to_cell2(head, float_arr): return [Cell(head)] + lmap(Cell, map("{0:.4f}".format, float_arr)) num_data = len(output_d['input_ids']) for data_idx, (p, h) in enumerate(initial_text): group_summary = per_group_summary[data_idx] p_tokens = p.split() h_tokens = h.split() base_score = group_summary.score_d[(-1, -1)] pred_str = make_prediction_summary_str(base_score) html.write_paragraph("Prediction: {}".format(pred_str)) keys = list(group_summary.score_d.keys()) p_idx_max = max(left(keys)) h_idx_max = max(right(keys)) def get_pair_score_by_h(key): p_score, h_score = group_summary.effect_d[key] return h_score def get_pair_score_by_p(key): p_score, h_score = group_summary.effect_d[key] return p_score def get_table(get_pair_score_at): head = [Cell("")] + [Cell(t) for t in p_tokens] rows = [head] for h_idx in range(h_idx_max + 1): row = [Cell(h_tokens[h_idx])] for p_idx in range(p_idx_max + 1): s = get_pair_score_at((p_idx, h_idx)) one_del_score = group_summary.score_d[(p_idx, -1)] two_del_score = group_summary.score_d[(p_idx, h_idx)] tooltip_str = "{} -> {}".format( float_arr_to_str_arr(one_del_score), float_arr_to_str_arr(two_del_score)) row.append( get_tooltip_cell(two_digit_float(s), tooltip_str)) rows.append(row) return rows html.write_table(get_table(get_pair_score_by_p)) html.write_table(get_table(get_pair_score_by_h)) html.write_bar()
def do(): pred_file_name = "RLPP_0.pickle" pred_file_name = "ukp_rel.pickle" record_file_name = "C:\\work\\Code\\Chair\\output\\unmasked_pair_x3_0" record_file_name = "C:\\work\\Code\\Chair\\output\\tf_enc" todo = [ ("RLPP_0.pickle", "C:\\work\\Code\\Chair\\output\\unmasked_pair_x3_0", "RLPP_wiki.html"), ("ukp_rel.pickle", "C:\\work\\Code\\Chair\\output\\tf_enc", "RLPP_ukp.html") ] x = [] y = [] for pred_file_name, record_file_name, out_name in todo: viewer = EstimatorPredictionViewerGosford(pred_file_name) html = HtmlVisualizer(out_name) itr1 = load_record_v2(record_file_name) itr2 = viewer.__iter__() cnt = 0 for features, entry in zip(itr1, itr2): cnt += 1 if cnt > 200: break input_ids1 = entry.get_tokens("input_ids") prob1 = entry.get_vector("prob1") prob2 = entry.get_vector("prob2") cells = viewer.cells_from_tokens(input_ids1) p1_l = [] p2_l = [] useful_l = [] row1 = [] row2 = [] row3 = [] row4 = [] for j, cell in enumerate(cells): p1 = float(prob1[j]) p2 = float(prob2[j]) x.append([p1]) y.append(p2) u = useful(p1, p2) score = (1 - u) * 100 cell.highlight_score = score row1.append(cell) row2.append(Cell(p1, score)) row3.append(Cell(p2, score)) row4.append(Cell(u, score)) p1_l.append(p1) p2_l.append(p2) useful_l.append(u) if len(row1) > 20: rows = [row1, row2, row3, row4] row1 = [] row2 = [] row3 = [] row4 = [] html.write_table(rows) html.write_paragraph("p1: {}".format(average(p1_l))) html.write_paragraph("p2: {}".format(average(p2_l))) html.write_paragraph("useful: {}".format(average(useful_l))) if average(useful_l) < 0.4: html.write_headline("Low Score") l = list(zip(x, y)) random.shuffle(l) l = l[:1000] x, y = zip(*l) lin = LinearRegression() lin.fit(x, y) poly = PolynomialFeatures(degree=4) X_poly = poly.fit_transform(x) poly.fit(X_poly, y) lin2 = LinearRegression() lin2.fit(X_poly, y) plt.scatter(x, y, color='blue') plt.plot(x, lin2.predict(poly.fit_transform(x)), color='red') plt.title('Polynomial Regression') plt.show()
def analyze_gradient(data, tokenizer): gradients = data['gradients'] d_input_ids = data['d_input_ids'] mask_input_ids = data['masked_input_ids'] masked_lm_positions = data["masked_lm_positions"] n_inst, seq_len = mask_input_ids.shape n_inst2, def_len = d_input_ids.shape assert n_inst == n_inst2 def_len = 256 hidden_dim = 768 reshaped_grad = reshape_gradienet(gradients, n_inst, def_len, hidden_dim) print(reshaped_grad.shape) n_pred = reshaped_grad.shape[1] grad_per_token = np.sum(np.abs(reshaped_grad), axis=3) html_writer = HtmlVisualizer("dict_grad.html", dark_mode=False) for inst_idx in range(n_inst): tokens = tokenizer.convert_ids_to_tokens(mask_input_ids[inst_idx]) #ans_tokens = tokenizer.convert_ids_to_tokens(input_ids[inst_idx]) for i in range(len(tokens)): if tokens[i] == "[MASK]": tokens[i] = "[MASK_{}]".format(i) if tokens[i] == "[SEP]": tokens[i] = "[SEP]<br>" def_tokens = tokenizer.convert_ids_to_tokens(d_input_ids[inst_idx]) s = tokenizer_wo_tf.pretty_tokens(tokens) lines = [] grad_total_max = 0 for pred_idx in range(n_pred): row = [] max_val = max(grad_per_token[inst_idx, pred_idx]) total = sum(grad_per_token[inst_idx, pred_idx]) mask_pos = masked_lm_positions[inst_idx, pred_idx] if total > grad_total_max: grad_total_max = total row.append(Cell(mask_pos)) row.append(Cell(int(total))) for def_idx in range(def_len): term = def_tokens[def_idx] cont_right = def_idx + 1 < def_len and def_tokens[ def_idx][:2] == "##" cont_left = term[:2] == "##" space_left = " " if not cont_left else "" space_right = " " if not cont_right else "" if term == "[PAD]": break if term == "[unused5]": term = "[\\n]" score = grad_per_token[inst_idx, pred_idx, def_idx] / (hidden_dim * 2) bg_color = get_color(score) row.append(Cell(term, score, not cont_left, not cont_right)) print("{}({})".format( term, grad_per_token[inst_idx, pred_idx, def_idx]), end=" ") lines.append((mask_pos, row)) print("") lines.sort(key=lambda x: x[0]) s = s.replace("[unused4]", "<b>DictTerm</b>") html_writer.write_paragraph(s) if grad_total_max > 5000000: html_writer.write_headline("HIGH Gradient") rows = right(lines) html_writer.write_table(rows) print("----------") html_writer.close()
def main(): prediction_file_path = at_output_dir("robust", "rob_dense2_pred.score") info_file_path = at_job_man_dir1("robust_predict_desc_128_step16_2_info") queries: Dict[str, str] = load_robust_04_query("desc") tokenizer = get_tokenizer() query_token_len_d = {} for qid, q_text in queries.items(): query_token_len_d[qid] = len(tokenizer.tokenize(q_text)) step_size = 16 window_size = 128 out_entries: List[AnalyzedDoc] = token_score_by_ablation( info_file_path, prediction_file_path, query_token_len_d, step_size, window_size) qrel_path = "/home/youngwookim/Downloads/rob04-desc/qrels.rob04.txt" judgement_d = load_qrels_structured(qrel_path) html = HtmlVisualizer("robust_desc_128_step16_2.html", use_tooltip=True) tprint("loading tokens pickles") tokens_d: Dict[str, List[str]] = load_pickle_from( os.path.join(sydney_working_dir, "RobustPredictTokens3", "1")) tprint("Now printing") n_printed = 0 def transform(x): return 3 * (math.pow(x - 0.5, 3) + math.pow(0.5, 3)) n_pos = 0 n_neg = 0 for e in out_entries: max_score: float = max( lmap(SegmentScorePair.get_max_score, flatten(e.token_info.values()))) if max_score < 0.6: if n_neg > n_pos: continue else: n_neg += 1 pass else: n_pos += 1 n_printed += 1 if n_printed > 500: break doc_tokens: List[str] = tokens_d[e.doc_id] score_len = max(e.token_info.keys()) + 1 judgement: Dict[str, int] = judgement_d[e.query_id] label = judgement[e.doc_id] if not len(doc_tokens) <= score_len < len(doc_tokens) + window_size: print("doc length : ", len(doc_tokens)) print("score len:", score_len) print("doc length +step_size: ", len(doc_tokens) + step_size) continue row = [] q_text = queries[e.query_id] html.write_paragraph("qid: " + e.query_id) html.write_paragraph("q_text: " + q_text) html.write_paragraph("Pred: {0:.2f}".format(max_score)) html.write_paragraph("Label: {0:.2f}".format(label)) for idx in range(score_len): token = doc_tokens[idx] if idx < len(doc_tokens) else '[-]' token_info: List[SegmentScorePair] = e.token_info[idx] full_scores: List[float] = lmap(SegmentScorePair.get_score_diff, token_info) full_score_str = " ".join(lmap(two_digit_float, full_scores)) # 1 ~ -1 score = average(full_scores) if score > 0: color = "B" else: color = "R" normalized_score = transform(abs(score)) * 200 c = get_tooltip_cell(token, full_score_str) c.highlight_score = normalized_score c.target_color = color row.append(c) html.multirow_print(row, 16)
def binary_feature_demo(datapoint_list): ci = PassageRankedListInterface(make_passage_query, Q_CONFIG_ID_BM25) not_found_set = set() _, clue12_13_df = load_clueweb12_B13_termstat() cdf = 50 * 1000 * 1000 html = HtmlVisualizer("pc_binary_feature.html") def idf_scorer(doc, claim_text, perspective_text): cp_tokens = nltk.word_tokenize(claim_text) + nltk.word_tokenize( perspective_text) cp_tokens = lmap(lambda x: x.lower(), cp_tokens) cp_tokens = set(cp_tokens) mentioned_terms = lfilter(lambda x: x in doc, cp_tokens) mentioned_terms = re_tokenize(mentioned_terms) def idf(term): if term not in clue12_13_df: if term in string.printable: return 0 not_found_set.add(term) return math.log((cdf + 0.5) / (clue12_13_df[term] + 0.5)) score = sum(lmap(idf, mentioned_terms)) max_score = sum(lmap(idf, cp_tokens)) # print(claim_text, perspective_text) # print(mentioned_terms) # print(score, max_score) return score, max_score, mentioned_terms def bm25_estimator(doc: Counter, claim_text: str, perspective_text: str): cp_tokens = nltk.word_tokenize(claim_text) + nltk.word_tokenize( perspective_text) cp_tokens = lmap(lambda x: x.lower(), cp_tokens) k1 = 0 def BM25_3(f, qf, df, N, dl, avdl) -> float: K = compute_K(dl, avdl) first = math.log((N - df + 0.5) / (df + 0.5)) second = ((k1 + 1) * f) / (K + f) return first * second dl = sum(doc.values()) info = [] for q_term in set(cp_tokens): if q_term in doc: score = BM25_3(doc[q_term], 0, clue12_13_df[q_term], cdf, dl, 1200) info.append((q_term, doc[q_term], clue12_13_df[q_term], score)) return info print_cnt = 0 for dp_idx, x in enumerate(datapoint_list): ranked_list: List[GalagoRankEntry] = ci.query_passage( x.cid, x.pid, x.claim_text, x.p_text) html.write_paragraph(x.claim_text) html.write_paragraph(x.p_text) html.write_paragraph("{}".format(x.label)) local_print_cnt = 0 lines = [] for ranked_entry in ranked_list: try: doc_id = ranked_entry.doc_id galago_score = ranked_entry.score tokens = load_doc(doc_id) doc_tf = Counter(tokens) if doc_tf is not None: score, max_score, mentioned_terms = idf_scorer( doc_tf, x.claim_text, x.p_text) matched = score > max_score * 0.75 else: matched = "Unk" score = "Unk" max_score = "Unk" def get_cell(token): if token in mentioned_terms: return Cell(token, highlight_score=50) else: return Cell(token) line = [doc_id, galago_score, matched, score, max_score] lines.append(line) html.write_paragraph("{0} / {1:.2f}".format( doc_id, galago_score)) html.write_paragraph("{}/{}".format(score, max_score)) bm25_info = bm25_estimator(doc_tf, x.claim_text, x.p_text) bm25_score = sum(lmap(lambda x: x[3], bm25_info)) html.write_paragraph( "bm25 re-estimate : {}".format(bm25_score)) html.write_paragraph("{}".format(bm25_info)) html.multirow_print(lmap(get_cell, tokens)) local_print_cnt += 1 if local_print_cnt > 10: break except KeyError: pass matched_idx = idx_where(lambda x: x[2], lines) if not matched_idx: html.write_paragraph("No match") else: last_matched = matched_idx[-1] lines = lines[:last_matched + 1] rows = lmap(lambda line: lmap(Cell, line), lines) html.write_table(rows) if dp_idx > 10: break
def load_bert_like(): disable_eager_execution() model = BertLike() sess = init_session() #sess.run(tf.compat.v1.global_variables_initializer()) load_v2_to_v2(sess, get_bert_full_path(), False) attention_prob_list, = sess.run([model.attention_probs_list]) html = HtmlVisualizer("position.html") for layer_no, attention_prob in enumerate(attention_prob_list): html.write_headline("Layer {}".format(layer_no)) acc_dict = {} zero_scores = [list() for _ in range(12)] for loc in range(2, 40, 2): print("Source : ", loc) for target_loc in range(20): offset = target_loc - loc print(offset, end=" ") for head_idx in range(num_head): key = offset, head_idx if key not in acc_dict: acc_dict[key] = [] e = attention_prob[0, head_idx, loc, target_loc] if target_loc != 0: acc_dict[key].append(e) else: zero_scores[head_idx].append(e) print("{0:.2f}".format(e * 100), end=" ") print() rows = [[Cell("Loc")] + [Cell("Head{}".format(i)) for i in range(12)]] for offset in range(-7, +7): print(offset, end=" ") scores = [] for head_idx in range(12): key = offset, head_idx try: elems = acc_dict[key] if len(elems) < 3: raise KeyError avg = average(elems) scores.append(avg) print("{0:.2f}".format(avg * 100), end=" ") except KeyError: print("SKIP") print() rows.append([Cell(offset)] + [Cell(float(v * 100), v * 1000) for v in scores]) html.write_table(rows) html.write_paragraph("Attention to first token") zero_scores = [average(l) for l in zero_scores] rows = [[Cell(" ")] + [Cell("Head{}".format(i)) for i in range(12)], [Cell(" ")] + [Cell(float(v * 100), v * 1000) for v in zero_scores]] html.write_table(rows)
def pred_loss_view(): tokenizer = get_tokenizer() filename = "tlm_loss_pred.pickle" filename = "tlm_loss_pred_on_dev.pickle" p = os.path.join(output_path, filename) data = pickle.load(open(p, "rb")) batch_size, seq_length = data[0]['input_ids'].shape keys = list(data[0].keys()) vectors = {} for e in data: for key in keys: if key not in vectors: vectors[key] = [] vectors[key].append(e[key]) for key in keys: vectors[key] = np.concatenate(vectors[key], axis=0) html_writer = HtmlVisualizer("pred_make_sense_dev.html", dark_mode=False) n_instance = len(vectors['input_ids']) n_instance = min(n_instance, 100) for inst_idx in range(n_instance): tokens = tokenizer.convert_ids_to_tokens( vectors['input_ids'][inst_idx]) locations = list(vectors['masked_lm_positions'][inst_idx]) def is_dependent(token): return len(token) == 1 and not token[0].isalnum() cells = [] for i in range(len(tokens)): f_same_pred = False score = 0 if i in locations and i != 0: i_idx = locations.index(i) tokens[i] = "[{}:{}]".format(i_idx, tokens[i]) pred_diff = vectors['pred_diff'][inst_idx][i_idx] gold_diff = vectors['gold_diff'][inst_idx][i_idx] pred_label = pred_diff > 0.3 gold_label = gold_diff > 0.3 if pred_label: score = 100 if gold_label: f_same_pred = True else: if gold_label: score = 30 f_same_pred = False if tokens[i] == "[SEP]": tokens[i] = "[SEP]<br>" if tokens[i] != "[PAD]": term = tokens[i] cont_left = term[:2] == "##" cont_right = i + 1 < len(tokens) and tokens[i + 1][:2] == "##" if i + 1 < len(tokens): dependent_right = is_dependent(tokens[i + 1]) else: dependent_right = False dependent_left = is_dependent(tokens[i]) if cont_left: term = term[2:] space_left = " " if not (cont_left or dependent_left) else "" space_right = " " if not (cont_right or dependent_right) else "" if f_same_pred: cells.append(Cell(term, score, space_left, space_right)) else: cells.append( Cell(term, score, space_left, space_right, target_color="R")) row = [] for cell in cells: row.append(cell) if len(row) == 20: html_writer.write_table([row]) row = [] row_head = [ Cell("Index"), Cell("P]Prob1"), Cell("P]Prob2"), Cell("G]Prob1"), Cell("G]Prob2"), Cell("P]Diff"), Cell("G]Diff"), ] def f_cell(obj): return Cell("{:04.2f}".format(obj)) rows = [row_head] pred_diff_list = [] gold_diff_list = [] for idx, pos in enumerate(locations): if pos == 0: break pred_diff = vectors['pred_diff'][inst_idx][idx] gold_diff = vectors['gold_diff'][inst_idx][idx] pred_diff_list.append(pred_diff) gold_diff_list.append(gold_diff) row = [ Cell(idx), f_cell(vectors['prob1'][inst_idx][idx]), f_cell(vectors['prob2'][inst_idx][idx]), f_cell(math.exp(-vectors['loss_base'][inst_idx][idx])), f_cell(math.exp(-vectors['loss_target'][inst_idx][idx])), f_cell(pred_diff), f_cell(gold_diff), ] rows.append(row) html_writer.write_table(rows) pred_diff = np.average(pred_diff_list) gold_diff = np.average(gold_diff_list) html_writer.write_paragraph( "Average Pred diff ={:04.2f} Observed diff={:04.2f} ".format( pred_diff, gold_diff)) if pred_diff > 0.3: html_writer.write_headline("High Drop") elif pred_diff < 0.1: html_writer.write_headline("Low Drop")
def main(): prediction_file_path = at_output_dir("robust", "rob_dense_pred.score") info_file_path = at_job_man_dir1("robust_predict_desc_128_step16_info") queries: Dict[str, str] = load_robust_04_query("desc") tokenizer = get_tokenizer() query_token_len_d = {} for qid, q_text in queries.items(): query_token_len_d[qid] = len(tokenizer.tokenize(q_text)) step_size = 16 window_size = 128 out_entries: List[DocTokenScore] = collect_token_scores( info_file_path, prediction_file_path, query_token_len_d, step_size, window_size) qrel_path = "/home/youngwookim/Downloads/rob04-desc/qrels.rob04.txt" judgement_d = load_qrels_structured(qrel_path) html = HtmlVisualizer("robust_desc_128_step16.html", use_tooltip=True) tprint("loading tokens pickles") tokens_d: Dict[str, List[str]] = load_pickle_from( os.path.join(sydney_working_dir, "RobustPredictTokens3", "1")) tprint("Now printing") n_printed = 0 def transform(x): return 3 * (math.pow(x - 0.5, 3) + math.pow(0.5, 3)) for e in out_entries: max_score = e.max_segment_score() if max_score < 0.6: continue n_printed += 1 if n_printed > 10: break doc_tokens: List[str] = tokens_d[e.doc_id] score_len = len(e.scores) judgement: Dict[str, int] = judgement_d[e.query_id] label = judgement[e.doc_id] if not len(doc_tokens) <= score_len < len(doc_tokens) + window_size: print("doc length : ", len(doc_tokens)) print("score len:", score_len) print("doc length +step_size: ", len(doc_tokens) + step_size) raise IndexError row = [] q_text = queries[e.query_id] html.write_paragraph("qid: " + e.query_id) html.write_paragraph("q_text: " + q_text) html.write_paragraph("Pred: {0:.2f}".format(max_score)) html.write_paragraph("Label: {0:.2f}".format(label)) for idx in range(score_len): token = doc_tokens[idx] if idx < len(doc_tokens) else '[-]' full_scores = e.full_scores[idx] full_score_str = " ".join(lmap(two_digit_float, full_scores)) score = e.scores[idx] normalized_score = transform(score) * 200 c = get_tooltip_cell(token, full_score_str) c.highlight_score = normalized_score row.append(c) html.multirow_print(row, 16)
def doc_lm_scoring(): gold = get_claim_perspective_id_dict() d_ids = list(load_train_claim_ids()) claims: List[Dict] = get_claims_from_ids(d_ids) claims = claims top_n = 10 q_res_path = FilePath( "/mnt/nfs/work3/youngwookim/data/perspective/train_claim/q_res_100") ranked_list: Dict[ str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path) preload_docs(ranked_list, claims, top_n) claim_lms = build_gold_lms(claims) claim_lms_d = {lm.cid: lm for lm in claim_lms} bg_lm = average_counters(lmap(lambda x: x.LM, claim_lms)) log_bg_lm = get_lm_log(bg_lm) stopwords = load_stopwords_for_query() alpha = 0.5 html_visualizer = HtmlVisualizer("doc_lm_doc_level.html") tokenizer = PCTokenizer() random_passages = [] num_pos_sum = 0 num_pos_exists = 0 for c in claims: q_res: List[SimpleRankedListEntry] = ranked_list[str(c['cId'])] html_visualizer.write_headline("{} : {}".format(c['cId'], c['text'])) # for cluster in clusters: # html_visualizer.write_paragraph("---") # p_text_list: List[str] = lmap(perspective_getter, cluster) # for text in p_text_list: # html_visualizer.write_paragraph(text) # html_visualizer.write_paragraph("---") claim_lm = claim_lms_d[c['cId']] topic_lm_prob = smooth(claim_lm.LM, bg_lm, alpha) log_topic_lm = get_lm_log(smooth(claim_lm.LM, bg_lm, alpha)) log_odd: Counter = subtract(log_topic_lm, log_bg_lm) claim_text = c['text'] claim_tokens = tokenizer.tokenize_stem(claim_text) scores = [] for t in claim_tokens: if t in log_odd: scores.append(log_odd[t]) threshold = average(scores) s = "\t".join(left(log_odd.most_common(30))) html_visualizer.write_paragraph("Log odd top: " + s) not_found = set() def get_log_odd(x): x = tokenizer.stemmer.stem(x) if x not in log_odd: not_found.add(x) return log_odd[x] def get_probs(x): x = tokenizer.stemmer.stem(x) if x not in topic_lm_prob: not_found.add(x) return topic_lm_prob[x] def get_passage_score(p): return sum([log_odd[tokenizer.stemmer.stem(t)] for t in p]) / len(p) if len(p) > 0 else 0 passages = iterate_passages(q_res, top_n, get_passage_score) passages.sort(key=lambda x: x[1], reverse=True) html_visualizer.write_paragraph("Threshold {}".format(threshold)) top5_scores = right(passages[:5]) bot5_scores = right(passages[-5:]) if len(random_passages) > 5: random_sel_pssages = random.choices(random_passages, k=5) else: random_sel_pssages = [] random5_scores = lmap(get_passage_score, random_sel_pssages) def score_line(scores): return " ".join(lmap(two_digit_float, scores)) html_visualizer.write_paragraph("top 5: " + score_line(top5_scores)) html_visualizer.write_paragraph("bot 5: " + score_line(bot5_scores)) html_visualizer.write_paragraph("random 5: " + score_line(random5_scores)) num_pos = len(lfilter(lambda x: x[1] > 0, passages)) num_pos_sum += num_pos if num_pos > 0: num_pos_exists += 1 def print_doc(doc, html_visualizer, score): cells = lmap(lambda x: get_cell_from_token(x, get_log_odd(x)), doc) html_visualizer.write_headline("score={}".format(score)) html_visualizer.multirow_print(cells, width=20) random_passages.extend(left(passages)) if threshold < 0: continue for doc, score in passages: if score < 0: break print_doc(doc, html_visualizer, score) html_visualizer.write_headline("Bottom 5") for doc, score in passages[-5:]: print_doc(doc, html_visualizer, score) print("{} claims. {} docs on {} claims".format(len(claims), num_pos_sum, num_pos_exists))
def view_grad_overlap(): filename = "gradient_overlap_4K.pickle" out_name = filename.split(".")[0] + ".html" html_writer = HtmlVisualizer(out_name, dark_mode=False) data = EstimatorPredictionViewerGosford(filename) iba = IntBinAverage() scores = [] for inst_i, entry in enumerate(data): masked_lm_example_loss = entry.get_vector("masked_lm_example_loss") score = entry.get_vector("overlap_score") if masked_lm_example_loss > 1: norm_score = score / masked_lm_example_loss iba.add(masked_lm_example_loss, norm_score) scores.append(score) score_avg = average(scores) score_std = np.std(scores) avg = iba.all_average() std_dict = {} for key, values in iba.list_dict.items(): std_dict[key] = np.std(values) if len(values) == 1: std_dict[key] = 999 def unlikeliness(value, mean, std): return abs(value - mean) / std data = EstimatorPredictionViewerGosford(filename) print("num record : ", data.data_len) cnt = 0 for inst_i, entry in enumerate(data): tokens = entry.get_mask_resolved_input_mask_with_input() masked_lm_example_loss = entry.get_vector("masked_lm_example_loss") highlight = lmap(is_mask, tokens) score = entry.get_vector("overlap_score") print(score) cells = data.cells_from_tokens(tokens, highlight) if masked_lm_example_loss > 1: bin_key = int(masked_lm_example_loss) norm_score = score / masked_lm_example_loss if norm_score > 5000: cnt += 1 expectation = avg[bin_key] if unlikeliness(score, score_avg, score_std) > 2 or True: html_writer.multirow_print(cells, 20) if norm_score > expectation: html_writer.write_paragraph("High") else: html_writer.write_paragraph("Low") html_writer.write_paragraph("Norm score: " + str(norm_score)) html_writer.write_paragraph("score: " + str(score)) html_writer.write_paragraph("masked_lm_example_loss: " + str(masked_lm_example_loss)) html_writer.write_paragraph("expectation: " + str(expectation)) print("number over 5000: ", cnt)
def load_and_visualize(): tokenizer = tokenizer_wo_tf.FullTokenizer( os.path.join(data_path, "bert_voca.txt")) data_id = "1" n_list = open(os.path.join(output_path, "lookup_n", data_id), "r").readlines() p = os.path.join(output_path, "example_loss.pickle") data = pickle.load(open(p, "rb")) data = data[0]["masked_lm_example_loss"] feature_itr = load_record_v1( os.path.join(output_path, "lookup_example", data_id)) n = len(n_list) feature_idx = 0 html_writer = HtmlVisualizer("lookup_loss2.html", dark_mode=False) for i in range(n): n_sample = int(n_list[i]) rows = [] assert n_sample > 0 for j in range(n_sample): feature = feature_itr.__next__() input_ids = take(feature["input_ids"]) masked_lm_ids = take(feature["masked_lm_ids"]) masked_lm_positions = take(feature["masked_lm_positions"]) input_mask = take(feature["input_mask"]) selected_word = take(feature["selected_word"]) d_input_ids = take(feature["d_input_ids"]) d_location_ids = take(feature["d_location_ids"]) word_tokens = tokenizer.convert_ids_to_tokens(selected_word) word = tokenizer_wo_tf.pretty_tokens((word_tokens)) emph_word = "<b>" + word + "</b>" if j == 0: mask_ans = {} masked_terms = tokenizer.convert_ids_to_tokens(masked_lm_ids) for pos, id in zip(list(masked_lm_positions), masked_terms): mask_ans[pos] = id tokens = tokenizer.convert_ids_to_tokens(input_ids) for i in range(len(tokens)): if tokens[i] == "[MASK]": tokens[i] = "[MASK_{}: {}]".format(i, mask_ans[i]) if i in d_location_ids and i is not 0: if tokens[i - 1] != emph_word: tokens[i] = emph_word else: tokens[i] = "-" def_str = tokenizer_wo_tf.pretty_tokens( tokenizer.convert_ids_to_tokens(d_input_ids), True) row = list() row.append(Cell(word)) row.append(Cell(data[feature_idx])) row.append(Cell(def_str)) rows.append(row) feature_idx += 1 s = tokenizer_wo_tf.pretty_tokens(tokens, True) html_writer.write_paragraph(s) html_writer.write_table(rows) html_writer.close()
def show_analyzed_html(analyzed_failture_cases: List[AnalyzedCase]): tokenizer = get_tokenizer() html = HtmlVisualizer("ca_contradiction_tokens.html") def get_token_scored_cells(sent1, sent2, token_score): tokens1 = tokenizer.tokenize(sent1) tokens2 = tokenizer.tokenize(sent2) print(token_score) score_for_1 = token_score[1:1 + len(tokens1)] score_for_2 = token_score[2 + len(tokens1):2 + len(tokens1) + len(tokens2)] assert len(tokens1) == len(score_for_1) assert len(tokens2) == len(score_for_2) def get_cells(tokens, scores): cap = max(max(scores), 1) factor = 100 / cap def normalize_score(s): return min(s * factor, 100) return list( [Cell(t, normalize_score(s)) for t, s in zip(tokens, scores)]) cells1 = get_cells(tokens1, score_for_1) cells2 = get_cells(tokens2, score_for_2) return cells1, cells2 def print_scored_sentences(scores): for i, _ in enumerate(scores): if i % 2 == 0: sent1, sent2, score1, token_score1 = scores[i] _, _, score2, token_score2 = scores[i + 1] if is_cont(score1): cells1, cells2 = get_token_scored_cells( sent1, sent2, token_score1) html.write_paragraph( "Forward, P(Contradiction) = {}".format(score1[2])) html.write_table([cells1]) html.write_table([cells2]) if is_cont(score2): cells1, cells2 = get_token_scored_cells( sent2, sent1, token_score2) html.write_paragraph( "Backward, P(Contradiction) = {}".format(score2[2])) html.write_table([cells1]) html.write_table([cells2]) def print_analyzed_case(analyzed_case: AnalyzedCase): def print_part(score_list): cnt = count_cont(score_list) s = "{} of {}".format(cnt, len(score_list)) html.write_paragraph(s) print_scored_sentences(score_list) html.write_paragraph("Gold") print_part(analyzed_case.score_g) html.write_paragraph("Pred") print_part(analyzed_case.score_p) def is_cont(scores): return np.argmax(scores) == 2 def is_cont_strict(scores): return scores[2] > 0.9 def count_cont(result_list): num_cont = sum( [1 for _, _, scores, _ in result_list if is_cont(scores)]) for idx, dp in enumerate(analyzed_failture_cases): html.write_headline("Data point {}".format(idx)) html.write_paragraph("------------") print_analyzed_case(dp)