def remake_pads(summaries, stdscr, index): new_pads = [] for i in range(0, len(summaries)): subject = utils.format_html(summaries[i]['subject']) contents = utils.format_html(summaries[i]['content_snipet']) reverse = i == index new_pads.append(summary_pad(subject, contents, stdscr, reverse)) return new_pads
def excel_gt_json_generate(): """ This function is used to generate rr_gt.json file for calculating similarity. Returns: Generate rr_gt.json for metirc.py to calculate similarity """ count = 0 gt_gt = {} excel_path = '/home/gita/Downloads/rr_table/excel/' text_path = '/home/gita/Downloads/rr_table/text/' html_path = '/home/gita/Downloads/rr_table/html/' father_path_file = '/home/gita/Downloads/final/f.txt' gt_json_file = './src/rr_gt.json' with open(father_path_file, 'r') as jf: file_list = jf.readlines() for file in file_list: file_name = file.split('/')[-1].split('.')[0] excel_file = excel_path + file_name + '.xlsx' text_file = text_path + file_name + '.json' adjacent_matrix, cell_text_list = excel_to_matrix( excel_file, text_file) gt_json = matrix_to_html(cell_text_list, adjacent_matrix) gt_html = format_html(gt_json) with open(html_path + file_name + '.html', 'w', encoding='utf-8') as hf: hf.write(str(gt_html)) gt_gt[file_name + '.json'] = {"html": gt_html} count += 1 print(count, file_name) # if count >= 1: # break with open(gt_json_file, 'w') as tf: tf.write(json.dumps(gt_gt))
def excel_pred_json_generate(): """ Process the results of OCR and PointerNet to recovery .html and generate .json Returns: Generate rr_pred.json for metirc.py to calculate similarity. """ count = 0 error_count = 0 text_path = '/home/gita/Downloads/rr_table/text/' uf_path = '/home/gita/Downloads/final/f1/' df_path = '/home/gita/Downloads/final/f2/' lm_path = '/home/gita/Downloads/final/m1/' rm_path = '/home/gita/Downloads/final/m2/' img_path_txt = '/home/gita/Downloads/final/f.txt' pred_html_path = './recovered_html/rr_html/' pred_json_file = './src/rr_pred.json' father_dict = get_parent_dict(uf_path, df_path, lm_path, rm_path, img_path_txt, number_per_file=2) file_list = get_specific_files('/home/gita/Downloads/rr_table/text/', ['.json']) pred_json = {} for file_name in file_list: # check_list = ['000917.json'] # if file_name not in check_list: # continue print(count, file_name) text_file = text_path + file_name # try: preds = axis_to_rows(text_file) # except: # error_count += 1 # print("-------------------------------------------------------------") preds_list, matrix = preds_to_matrix(preds, father_dict[file_name]) if matrix is None: continue img_json = matrix_to_html(preds_list, matrix) pred_html = format_html(img_json) with open(pred_html_path + file_name + '.html', 'w', encoding='utf-8') as hf: hf.write(str(pred_html)) pred_json[file_name] = pred_html count += 1 # if count >= len(check_list): # break print(error_count) with open(pred_json_file, 'w') as tf: tf.write(json.dumps(pred_json).strip())
def ret_json_generate(): """ Process the results of OCR and PointerNet to recovery .html and generate .json Returns: Generate html.json for metirc.py to calculate similarity. """ ocred_path = '/home/gita/Downloads/mini_result/mini_json/' img_path_txt = '/home/gita/Downloads/mini_result_50/mini_father.txt' uf_path = '/home/gita/Downloads/mini_result_50/father/' df_path = '' lm_path = '/home/gita/Downloads/mini_result_50/mother_p/' rm_path = '/home/gita/Downloads/mini_result_50/mother_n/' ret_html_path = './recovered_html/mini_50/' ret_json_file = './src/mini_pred_50.json' father_dict = get_parent_dict(uf_path, df_path, lm_path, rm_path, img_path_txt, number_per_file=2) ocred_files = get_specific_files(ocred_path) count = 0 pred_json = {} for file_name in ocred_files: if file_name in [ 'PMC3707453_006_00.png.json', 'PMC6022086_007_00.png.json' ]: continue preds = parse_ocred_json(ocred_path + file_name) preds = merge_by_iou(preds) count += 1 print(count, file_name) preds_list, matrix = preds_to_matrix(preds, father_dict[file_name]) if matrix is None: continue img_json = matrix_to_html(preds_list, matrix) pred_html = format_html(img_json) pred_json[file_name[:-5]] = pred_html with open(ret_html_path + file_name + '.html', 'w', encoding='utf-8') as hf: hf.write(str(pred_html)) # break with open(ret_json_file, 'w') as tf: tf.write(json.dumps(pred_json).strip())
def post_pad(post, network, stdscr): lines = [] width = stdscr.getmaxyx()[1] bold = curses.A_BOLD norm = curses.A_NORMAL #lines.append((" ", norm)) subject = utils.format_html(post['subject']) for line in wrap(subject, width - 2): lines.append((line, bold)) contents = utils.format_html(post['contents']).replace( '___bold_start___', '').replace('___bold_end___', '') for line in wrap(contents, width - 2): lines.append((line, norm)) author = network.get_user_name(post['uid']).encode( 'ascii', 'ignore') if post['uid'] else 'Anonymous' right_author = " " * (width - len(author) - 4) + '- ' + author lines.append((right_author, norm)) if post['student_answer']: lines.append(("Student Answer:", bold)) answer = post['student_answer'] contents = utils.format_html(answer['contents']).replace( '___bold_start___', '').replace('___bold_end___', '') for line in wrap(contents, width - 2): lines.append((line, norm)) author = network.get_user_name(answer['uid']).encode( 'ascii', 'ignore') if answer['uid'] else 'Anonymous' right_author = " " * (width - len(author) - 4) + '- ' + author lines.append((right_author, norm)) if post['instructor_answer']: lines.append(("Instructor Answer:", bold)) answer = post['instructor_answer'] contents = utils.format_html(answer['contents']).replace( '___bold_start___', '').replace('___bold_end___', '') for line in wrap(contents, width - 2): lines.append((line, norm)) author = network.get_user_name(answer['uid']).encode( 'ascii', 'ignore') if answer['uid'] else 'Anonymous' right_author = " " * (width - len(author) - 4) + '- ' + author lines.append((right_author, norm)) if len(post['followups']) > 0: lines.append(("Followup Discussions:", bold)) for followup in post['followups']: subject = utils.format_html(followup['subject']) hasUID = 'uid' in followup and followup['uid'] author = network.get_user_name( followup['uid']) if hasUID else 'Anonymous' entry = subject + " (" + author.encode('ascii', 'ignore') + ")" if followup['plus_ones'] > 0: entry += ' +' + str(followup['plus_ones']) for line in wrap(entry, width - 2): lines.append((line, norm)) for reply in followup['children']: subject = utils.format_html(reply['subject']) hasUID = 'uid' in reply and reply['uid'] author = network.get_user_name( reply['uid']) if hasUID else 'Anonymous' entry = subject + " (" + author.encode('ascii', 'ignore') + ")" first = True for line in wrap(entry, width - 9): if first: lines.append((" " + line, norm)) first = False else: lines.append((" " + line, norm)) lines.append(("", norm)) pad = curses.newpad(len(lines) + 1, width) for item in lines: contents, style = item pad.addstr("\n " + contents, style) return pad
def view_summaries(stdscr, feed, network): curses.use_default_colors() pads = [] summaries = [] global data data = [] unsaved_post = None height_sum = 0 window_height = stdscr.getmaxyx()[0] window_width = stdscr.getmaxyx()[1] while True: post = feed.next_post() if not post: break subject = utils.format_html(post['subject']) contents = utils.format_html(post['content_snipet']) subject_lines = textwrap.wrap(subject, window_width - 2) lines = textwrap.wrap(contents, window_width - 2) height = len(subject_lines) + len(lines) + 2 if height_sum + height <= window_height * 2: summaries.append(post) data.append((subject, contents)) height_sum += height pads.append(summary_pad(subject, contents, stdscr)) else: unsaved_post = post break # Main loop i = 0 stdscr.erase() stdscr.refresh() if len(pads) > 0: pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True) stdscr.nodelay(False) # getch is non-blocking render(pads, i, stdscr) while True: c = stdscr.getch() # Quit the program if c == ord('q') or c == curses.KEY_BACKSPACE: break elif c == ord('i'): url = "https://piazza.com/class/" + network._nid + "?cid=" + summaries[ i]['id'] plat = platform.system() if plat == "Windows": subprocess.Popen(["explorer", url]) elif plat == "Darwin": subprocess.Popen(["open", url]) elif plat == "Linux": subprocess.Popen(["x-www-browser", url]) # Check for ENTER elif c == ord('\n'): stdscr.clear() stdscr.refresh() post_summary_obj = summaries[i] id_num = post_summary_obj['id'] post_obj = network.get_post(id_num) post_viewer.view_post(post_obj, network, stdscr) pads = remake_pads(summaries, stdscr, i) # Scroll down elif c == ord('j') or c == curses.KEY_DOWN: post = unsaved_post if unsaved_post else feed.next_post() unsaved_post = None if post: subject = utils.format_html(post['subject']) contents = utils.format_html(post['content_snipet']) summaries.append(post) data.append((subject, contents)) pads.append(summary_pad(subject, contents, stdscr)) if i < len(pads) - 1: pads[i] = summary_pad(data[i][0], data[i][1], stdscr) i += 1 pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True) # Scroll up elif (c == ord('k') or c == curses.KEY_UP) and i > 0: pads[i] = summary_pad(data[i][0], data[i][1], stdscr) i -= 1 pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True) # Check for window resize if c == curses.KEY_RESIZE: stdscr.erase() stdscr.refresh() pads = remake_pads(summaries, stdscr, i) render(pads, i, stdscr)
def view_summaries(stdscr, feed, network): curses.use_default_colors() pads = [] summaries = [] global data data = [] unsaved_post = None height_sum = 0 window_height = stdscr.getmaxyx()[0] window_width = stdscr.getmaxyx()[1] while True: post = feed.next_post() if not post: break subject = utils.format_html(post['subject']) contents = utils.format_html(post['content_snipet']) subject_lines = textwrap.wrap(subject, window_width - 2) lines = textwrap.wrap(contents, window_width - 2) height = len(subject_lines) + len(lines) + 2 if height_sum + height <= window_height*2: summaries.append(post) data.append((subject, contents)) height_sum += height pads.append(summary_pad(subject, contents, stdscr)) else: unsaved_post = post break # Main loop i = 0 stdscr.erase() stdscr.refresh() if len(pads) > 0: pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True) stdscr.nodelay(False) # getch is non-blocking render(pads, i, stdscr) while True: c = stdscr.getch() # Quit the program if c == ord('q') or c == curses.KEY_BACKSPACE: break elif c == ord('i'): url = "https://piazza.com/class/" + network._nid + "?cid=" + summaries[i]['id'] plat = platform.system() if plat == "Windows": subprocess.Popen(["explorer", url]) elif plat == "Darwin": subprocess.Popen(["open", url]) elif plat == "Linux": subprocess.Popen(["x-www-browser", url]) # Check for ENTER elif c == ord('\n'): stdscr.clear() stdscr.refresh() post_summary_obj = summaries[i] id_num = post_summary_obj['id'] post_obj = network.get_post(id_num) post_viewer.view_post(post_obj, network, stdscr) pads = remake_pads(summaries, stdscr, i) # Scroll down elif c == ord('j') or c == curses.KEY_DOWN: post = unsaved_post if unsaved_post else feed.next_post() unsaved_post = None if post: subject = utils.format_html(post['subject']) contents = utils.format_html(post['content_snipet']) summaries.append(post) data.append((subject, contents)) pads.append(summary_pad(subject, contents, stdscr)) if i < len(pads) - 1: pads[i] = summary_pad(data[i][0], data[i][1], stdscr) i += 1 pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True) # Scroll up elif (c == ord('k') or c == curses.KEY_UP) and i > 0: pads[i] = summary_pad(data[i][0], data[i][1], stdscr) i -= 1 pads[i] = summary_pad(data[i][0], data[i][1], stdscr, True) # Check for window resize if c == curses.KEY_RESIZE: stdscr.erase() stdscr.refresh() pads = remake_pads(summaries, stdscr, i) render(pads, i, stdscr)