def generate_summary(article_sent_tokens, qid_ssi_to_importances, example_idx): qid = example_idx summary_sent_tokens = [] summary_tokens = util.flatten_list_of_lists(summary_sent_tokens) already_used_source_indices = [] similar_source_indices_list = [] summary_sents_for_html = [] ssi_length_extractive = None # Iteratively select a singleton/pair from the article that has the highest score from BERT while len(summary_tokens) < 300: if len(summary_tokens) >= l_param and ssi_length_extractive is None: ssi_length_extractive = len(similar_source_indices_list) mmr_dict = util.calc_MMR_source_indices(article_sent_tokens, summary_tokens, None, qid_ssi_to_importances, qid=qid) sents, source_indices = get_best_source_sents(article_sent_tokens, mmr_dict, already_used_source_indices) if len(source_indices) == 0: break summary_sent_tokens.extend(sents) summary_tokens = util.flatten_list_of_lists(summary_sent_tokens) similar_source_indices_list.append(source_indices) summary_sents_for_html.append(' <br> '.join([' '.join(sent) for sent in sents])) if filter_sentences: already_used_source_indices.extend(source_indices) if ssi_length_extractive is None: ssi_length_extractive = len(similar_source_indices_list) selected_article_sent_indices = util.flatten_list_of_lists(similar_source_indices_list[:ssi_length_extractive]) summary_sents = [' '.join(sent) for sent in util.reorder(article_sent_tokens, selected_article_sent_indices)] return summary_sents, similar_source_indices_list, summary_sents_for_html, ssi_length_extractive
def generate_summary(article_sent_tokens, qid_ssi_to_importances, example_idx): qid = example_idx summary_sent_tokens = [] summary_tokens = util.flatten_list_of_lists(summary_sent_tokens) already_used_source_indices = [] similar_source_indices_list = [] summary_sents_for_html = [] ssi_length_extractive = None while len(summary_tokens) < 1000: if len(summary_tokens) >= l_param and ssi_length_extractive is None: ssi_length_extractive = len(similar_source_indices_list) if FLAGS.dataset_name == 'xsum' and len(summary_tokens) > 0: ssi_length_extractive = len(similar_source_indices_list) break mmr_dict = util.calc_MMR_source_indices(article_sent_tokens, summary_tokens, None, qid_ssi_to_importances, qid=qid) sents, source_indices = get_best_source_sents( article_sent_tokens, mmr_dict, already_used_source_indices) if len(source_indices) == 0: break summary_sent_tokens.extend(sents) summary_tokens = util.flatten_list_of_lists(summary_sent_tokens) similar_source_indices_list.append(source_indices) summary_sents_for_html.append(' <br> '.join( [' '.join(sent) for sent in sents])) if filter_sentences: already_used_source_indices.extend(source_indices) if ssi_length_extractive is None: ssi_length_extractive = len(similar_source_indices_list) selected_article_sent_indices = util.flatten_list_of_lists( similar_source_indices_list[:ssi_length_extractive]) summary_sents = [ ' '.join(sent) for sent in util.reorder(article_sent_tokens, selected_article_sent_indices) ] # summary = '\n'.join([' '.join(tokens) for tokens in summary_sent_tokens]) return summary_sents, similar_source_indices_list, summary_sents_for_html, ssi_length_extractive
def main(): config.initializeConfig() if len(sys.argv) < 3: print 'USAGE:', sys.argv[0], USAGE # sys.exit(0) myVrp, solutions, myStyleSheet = None, None, None else: # type of input data we're working on toks = sys.argv[1].split(':') type = toks[0] subtype = toks[1] if len(toks) > 1 else 'default' # solutions to load and their subtype if len(sys.argv) > 3 and sys.argv[3][0] == ':': solutionSubtype = sys.argv[3][1:] solutionFileNames = sys.argv[4:] elif len(sys.argv) > 2: solutionSubtype = 'default' solutionFileNames = sys.argv[3:] else: solutionFileNames = False # loader object to load all of this loader = loaddata.DataLoader() # here we load the data myVrp = loader.loadInstance(sys.argv[2], type, subtype) # reorder solutions using numbers in file names if solutionFileNames: solutionFileNames = util.reorder(solutionFileNames) solutions = [ loader.loadSolution(fName, myVrp, type, solutionSubtype) for fName in solutionFileNames ] else: solutions = None myStyleSheet = loader.loadStyleSheet(type) # myStyleSheet = loaddata.stylesheetFromType(type) # myStyleSheet = stylesheet.FunkyStyleSheet() app = wxgui.vrpgui.VrpGui(myVrp, solutions, myStyleSheet) app.MainLoop()
def get_similar_source_sents_recursive(summ_sent, partial_summ_sent, selection, article_sent_tokens, vocab, similarities, depth, sentence_limit, min_matched_tokens): if sentence_limit == 1: if depth > 2: return [[]], [[]], [[]] elif len(selection) < 3 or depth >= sentence_limit: # base case: when summary sentence is too short return [[]], [[]], [[]] all_sent_indices = [] all_lcs_paths = [] all_smooth_article_paths = [] # partial_summ_sent = util.reorder(summ_sent, selection) top_sent_indices, top_similarity = get_top_similar_sent(partial_summ_sent, article_sent_tokens, vocab) top_similarities = util.reorder(similarities, top_sent_indices) top_sent_indices = [x for _, x in sorted(zip(top_similarities, top_sent_indices), key=lambda pair: pair[0])][::-1] for top_sent_idx in top_sent_indices: nonstopword_matches, _ = util.matching_unigrams(partial_summ_sent, article_sent_tokens[top_sent_idx], should_remove_stop_words=True) lcs_len, (summ_lcs_path, _) = util.matching_unigrams(partial_summ_sent, article_sent_tokens[top_sent_idx]) smooth_article_path = get_smooth_path(summ_sent, article_sent_tokens[top_sent_idx]) if len(nonstopword_matches) < min_matched_tokens: continue leftover_selection = [idx for idx in range(len(partial_summ_sent)) if idx not in summ_lcs_path] partial_summ_sent = replace_with_blanks(partial_summ_sent, leftover_selection) sent_indices, lcs_paths, smooth_article_paths = get_similar_source_sents_recursive( summ_sent, partial_summ_sent, leftover_selection, article_sent_tokens, vocab, similarities, depth+1, sentence_limit, min_matched_tokens) # recursive call combined_sent_indices = [[top_sent_idx] + indices for indices in sent_indices] # append my result to the recursive collection combined_lcs_paths = [[summ_lcs_path] + paths for paths in lcs_paths] combined_smooth_article_paths = [[smooth_article_path] + paths for paths in smooth_article_paths] all_sent_indices.extend(combined_sent_indices) all_lcs_paths.extend(combined_lcs_paths) all_smooth_article_paths.extend(combined_smooth_article_paths) if len(all_sent_indices) == 0: return [[]], [[]], [[]] return all_sent_indices, all_lcs_paths, all_smooth_article_paths
def evaluate_example(ex): example, example_idx, qid_ssi_to_importances, _, _ = ex print(example_idx) # Read example from dataset raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, doc_indices = util.unpack_tf_example(example, names_to_types) article_sent_tokens = [util.process_sent(sent) for sent in raw_article_sents] enforced_groundtruth_ssi_list = util.enforce_sentence_limit(groundtruth_similar_source_indices_list, sentence_limit) groundtruth_summ_sents = [[sent.strip() for sent in groundtruth_summary_text.strip().split('\n')]] groundtruth_summ_sent_tokens = [sent.split(' ') for sent in groundtruth_summ_sents[0]] if FLAGS.upper_bound: # If upper bound, then get the groundtruth singletons/pairs replaced_ssi_list = util.replace_empty_ssis(enforced_groundtruth_ssi_list, raw_article_sents) selected_article_sent_indices = util.flatten_list_of_lists(replaced_ssi_list) summary_sents = [' '.join(sent) for sent in util.reorder(article_sent_tokens, selected_article_sent_indices)] similar_source_indices_list = groundtruth_similar_source_indices_list ssi_length_extractive = len(similar_source_indices_list) else: # Generates summary based on BERT output. This is an extractive summary. summary_sents, similar_source_indices_list, summary_sents_for_html, ssi_length_extractive = generate_summary(article_sent_tokens, qid_ssi_to_importances, example_idx) similar_source_indices_list_trunc = similar_source_indices_list[:ssi_length_extractive] summary_sents_for_html_trunc = summary_sents_for_html[:ssi_length_extractive] if example_idx <= 1: summary_sent_tokens = [sent.split(' ') for sent in summary_sents_for_html_trunc] extracted_sents_in_article_html = html_highlight_sents_in_article(summary_sent_tokens, similar_source_indices_list_trunc, article_sent_tokens, doc_indices=doc_indices) groundtruth_ssi_list, lcs_paths_list, article_lcs_paths_list = get_simple_source_indices_list( groundtruth_summ_sent_tokens, article_sent_tokens, None, sentence_limit, min_matched_tokens) groundtruth_highlighted_html = html_highlight_sents_in_article(groundtruth_summ_sent_tokens, groundtruth_ssi_list, article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list=article_lcs_paths_list, doc_indices=doc_indices) all_html = '<u>System Summary</u><br><br>' + extracted_sents_in_article_html + '<u>Groundtruth Summary</u><br><br>' + groundtruth_highlighted_html ssi_functions.write_highlighted_html(all_html, html_dir, example_idx) rouge_functions.write_for_rouge(groundtruth_summ_sents, summary_sents, example_idx, ref_dir, dec_dir) return (groundtruth_similar_source_indices_list, similar_source_indices_list, ssi_length_extractive)
def get_merge_example(similar_source_indices, article_sent_tokens, summ_sent, corefs, article_lcs_paths): # restricted_source_indices = [] # for source_indices_idx, source_indices in enumerate(similar_source_indices): # if source_indices_idx >= FLAGS.sentence_limit: # break # restricted_source_indices.append(source_indices[0]) if FLAGS.chronological and len(similar_source_indices) > 1: if similar_source_indices[0] > similar_source_indices[1]: similar_source_indices = (min(similar_source_indices), max(similar_source_indices)) article_lcs_paths = (article_lcs_paths[1], article_lcs_paths[0]) merged_example_sentences = [ ' '.join(sent) for sent in util.reorder(article_sent_tokens, similar_source_indices) ] merged_example_article_text = ' '.join(merged_example_sentences) merged_example_abstracts = [[' '.join(summ_sent)]] merge_example = convert_data.make_example(merged_example_article_text, merged_example_abstracts, None, merged_example_sentences, corefs, article_lcs_paths) return merge_example
def main(unused_argv): print('Running statistics on %s' % FLAGS.dataset_name) if len(unused_argv ) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) out_dir = os.path.join( os.path.expanduser('~') + '/data/kaiqiang_data', FLAGS.dataset_name) if FLAGS.mode == 'write': util.create_dirs(out_dir) if FLAGS.dataset_name == 'duc_2004': dataset_splits = ['test'] elif FLAGS.dataset_split == 'all': dataset_splits = ['test', 'val', 'train'] else: dataset_splits = [FLAGS.dataset_split] for dataset_split in dataset_splits: if dataset_split == 'test': ssi_data_path = os.path.join( 'logs/%s_bert_both_sentemb_artemb_plushidden' % FLAGS.dataset_name, 'ssi.pkl') print(util.bcolors.OKGREEN + "Loading SSI from BERT at %s" % ssi_data_path + util.bcolors.ENDC) with open(ssi_data_path) as f: ssi_triple_list = pickle.load(f) source_dir = os.path.join(data_dir, FLAGS.dataset_name) source_files = sorted( glob.glob(source_dir + '/' + dataset_split + '*')) total = len(source_files) * 1000 if ( 'cnn' in FLAGS.dataset_name or 'newsroom' in FLAGS.dataset_name or 'xsum' in FLAGS.dataset_name) else len(source_files) example_generator = data.example_generator( source_dir + '/' + dataset_split + '*', True, False, should_check_valid=False) out_document_path = os.path.join(out_dir, dataset_split + '.Ndocument') out_summary_path = os.path.join(out_dir, dataset_split + '.Nsummary') out_example_idx_path = os.path.join(out_dir, dataset_split + '.Nexampleidx') doc_writer = open(out_document_path, 'w') if dataset_split != 'test': sum_writer = open(out_summary_path, 'w') ex_idx_writer = open(out_example_idx_path, 'w') for example_idx, example in enumerate( tqdm(example_generator, total=total)): if FLAGS.num_instances != -1 and example_idx >= FLAGS.num_instances: break raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, doc_indices = util.unpack_tf_example( example, names_to_types) article_sent_tokens = [ util.process_sent(sent) for sent in raw_article_sents ] if FLAGS.dataset_name == 'duc_2004': groundtruth_summ_sents = [[ sent.strip() for sent in gt_summ_text.strip().split('\n') ] for gt_summ_text in groundtruth_summary_text] else: groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] if doc_indices is None: doc_indices = [0] * len( util.flatten_list_of_lists(article_sent_tokens)) doc_indices = [int(doc_idx) for doc_idx in doc_indices] # rel_sent_indices, _, _ = preprocess_for_lambdamart_no_flags.get_rel_sent_indices(doc_indices, article_sent_tokens) if dataset_split == 'test': if example_idx >= len(ssi_triple_list): raise Exception( 'Len of ssi list (%d) is less than number of examples (>=%d)' % (len(ssi_triple_list), example_idx)) ssi_length_extractive = ssi_triple_list[example_idx][2] if ssi_length_extractive > 1: a = 0 ssi = ssi_triple_list[example_idx][1] ssi = ssi[:ssi_length_extractive] groundtruth_similar_source_indices_list = ssi else: groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, FLAGS.sentence_limit) for ssi_idx, ssi in enumerate( groundtruth_similar_source_indices_list): if len(ssi) == 0: continue my_article = ' '.join(util.reorder(raw_article_sents, ssi)) doc_writer.write(my_article + '\n') if dataset_split != 'test': sum_writer.write(groundtruth_summ_sents[0][ssi_idx] + '\n') ex_idx_writer.write(str(example_idx) + '\n') elif FLAGS.mode == 'evaluate': summary_dir = '/home/logan/data/kaiqiang_data/logan_ACL/trained_on_' + FLAGS.train_dataset + '/' + FLAGS.dataset_name out_summary_path = os.path.join(summary_dir, 'test' + 'Summary.txt') out_example_idx_path = os.path.join(out_dir, 'test' + '.Nexampleidx') decode_dir = 'logs/kaiqiang_%s_trainedon%s' % (FLAGS.dataset_name, FLAGS.train_dataset) rouge_ref_dir = os.path.join(decode_dir, 'reference') rouge_dec_dir = os.path.join(decode_dir, 'decoded') util.create_dirs(rouge_ref_dir) util.create_dirs(rouge_dec_dir) def num_lines_in_file(file_path): with open(file_path) as f: num_lines = sum(1 for line in f) return num_lines def process_example(sents, ex_idx, groundtruth_summ_sents): final_decoded_words = [] for sent in sents: final_decoded_words.extend(sent.split(' ')) rouge_functions.write_for_rouge(groundtruth_summ_sents, None, ex_idx, rouge_ref_dir, rouge_dec_dir, decoded_words=final_decoded_words, log=False) num_lines_summary = num_lines_in_file(out_summary_path) num_lines_example_indices = num_lines_in_file(out_example_idx_path) if num_lines_summary != num_lines_example_indices: raise Exception( 'Num lines summary != num lines example indices: (%d, %d)' % (num_lines_summary, num_lines_example_indices)) source_dir = os.path.join(data_dir, FLAGS.dataset_name) example_generator = data.example_generator(source_dir + '/' + 'test' + '*', True, False, should_check_valid=False) sum_writer = open(out_summary_path) ex_idx_writer = open(out_example_idx_path) prev_ex_idx = 0 sents = [] for line_idx in tqdm(range(num_lines_summary)): line = sum_writer.readline() ex_idx = int(ex_idx_writer.readline()) if ex_idx == prev_ex_idx: sents.append(line) else: example = example_generator.next() raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, doc_indices = util.unpack_tf_example( example, names_to_types) if FLAGS.dataset_name == 'duc_2004': groundtruth_summ_sents = [[ sent.strip() for sent in gt_summ_text.strip().split('\n') ] for gt_summ_text in groundtruth_summary_text] else: groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] process_example(sents, ex_idx, groundtruth_summ_sents) prev_ex_idx = ex_idx sents = [line] example = example_generator.next() raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, doc_indices = util.unpack_tf_example( example, names_to_types) if FLAGS.dataset_name == 'duc_2004': groundtruth_summ_sents = [[ sent.strip() for sent in gt_summ_text.strip().split('\n') ] for gt_summ_text in groundtruth_summary_text] else: groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] process_example(sents, ex_idx, groundtruth_summ_sents) print("Now starting ROUGE eval...") if FLAGS.dataset_name == 'xsum': l_param = 100 else: l_param = 100 results_dict = rouge_functions.rouge_eval(rouge_ref_dir, rouge_dec_dir, l_param=l_param) rouge_functions.rouge_log(results_dict, decode_dir) else: raise Exception('mode flag was not evaluate or write.')
def result(img): width = 700 height = 700 img = cv2.resize(img, (width, height)) img_cont = img.copy() img_cp = img.copy() img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1) img_edge = cv2.Canny(img_blur, 1, 30) contours, hierarchy = cv2.findContours(img_edge, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cv2.drawContours(img_cont, contours, -1, (0, 255, 0), 5) r = util.rectContour(contours) rlist = [] #drawing rectangular contours on img_cp for i in range(len(r)): rc = util.getCornerPoints(r[i]) cv2.drawContours(img_cp, r[i], -1, (255, 0, 0), 5) rc = util.reorder(rc) rlist.append(rc) #11-14 pts_1 = np.float32(rlist[0]) pts_2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) matrix = cv2.getPerspectiveTransform(pts_1, pts_2) # GET TRANSFORMATION MATRIX imgWarpColored_details = cv2.warpPerspective(img, matrix, (width, 1000)) r0 = cv2.cvtColor(imgWarpColored_details, cv2.COLOR_BGR2GRAY) r01 = r0[124:700, 29:121] r01_thresh = cv2.threshold(r01, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r01 = util.splitBoxes(r01_thresh, 4, 4) ans11_14 = util.getArray(4, 4, box_r01) #15-18 r02 = r0[124:700, 171:263] r02_thresh = cv2.threshold(r02, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r02 = util.splitBoxes(r02_thresh, 4, 4) ans15_18 = util.getArray(4, 4, box_r02) #19-22 r03 = r0[124:700, 313:405] r03_thresh = cv2.threshold(r03, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r03 = util.splitBoxes(r03_thresh, 4, 4) ans19_22 = util.getArray(4, 4, box_r03) #23-26 r04 = r0[124:700, 455:547] r04_thresh = cv2.threshold(r04, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r04 = util.splitBoxes(r04_thresh, 4, 4) ans23_26 = util.getArray(4, 4, box_r04) #27-30 r05 = r0[124:700, 597:689] r05_thresh = cv2.threshold(r05, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r05 = util.splitBoxes(r05_thresh, 4, 4) ans27_30 = util.getArray(4, 4, box_r05) #information pts_11 = np.float32(rlist[1]) pts_21 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) matrix = cv2.getPerspectiveTransform(pts_11, pts_21) # GET TRANSFORMATION MATRIX imgWarpColored_details1 = cv2.warpPerspective(img, matrix, (width, 1000)) r1 = cv2.cvtColor(imgWarpColored_details1, cv2.COLOR_BGR2GRAY) #enrollment_no r11 = r1[140:680, 50:400] r11_thresh = cv2.threshold(r11, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r11 = util.splitBoxes(r11_thresh, 10, 10) enrollment_no = np.array(util.getArray(10, 10, box_r11)) en = enrollment_no.T enl = [] for i in range(10): enl.append(str((np.argmax(en[i]) + 1) % 10)) eno = "".join(enl) #test_id r12 = r1[140:680, 500:690] r12_thresh = cv2.threshold(r12, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r12 = util.splitBoxes(r12_thresh, 10, 5) test_id = np.array(util.getArray(10, 5, box_r12)) tid = test_id.T til = [] for i in range(5): til.append(str((np.argmax(tid[i]) + 1) % 10)) tids = "".join(til) pts_13 = np.float32(rlist[2]) pts_23 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) matrix = cv2.getPerspectiveTransform(pts_13, pts_23) # GET TRANSFORMATION MATRIX imgWarpColored_details2 = cv2.warpPerspective(img, matrix, (width, 1000)) r2 = cv2.cvtColor(imgWarpColored_details2, cv2.COLOR_BGR2GRAY) #1-5 r21 = r2[190:690, 120:320] r21_thresh = cv2.threshold(r21, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r21 = util.splitBoxes(r21_thresh, 5, 4) ans1_5 = util.getArray(5, 4, box_r21) #print(ans1_5) #6-10 r22 = r2[190:690, 420:620] r22_thresh = cv2.threshold(r22, 100, 255, cv2.THRESH_BINARY_INV)[1] box_r22 = util.splitBoxes(r22_thresh, 5, 4) ans6_10 = util.getArray(5, 4, box_r22) #tmp = np.array(ans6_10) #for i in range(5): # print(np.argmax(tmp[i])) #print(ans6_10) a1 = np.array(ans1_5) a2 = np.array(ans6_10) a3 = np.array(ans11_14) a4 = np.array(ans15_18) a5 = np.array(ans19_22) a6 = np.array(ans23_26) a7 = np.array(ans27_30) options = ['a', 'b', 'c', 'd'] answers = [] for i in range(5): if (np.max(a1[i]) != 0): answers.append(options[np.argmax(a1[i])]) else: answers.append('x') for i in range(5): if (np.max(a2[i]) != 0): answers.append(options[np.argmax(a2[i])]) else: answers.append('x') for i in range(4): if (np.max(a3[i]) != 0): answers.append(options[np.argmax(a3[i])]) else: answers.append('x') for i in range(4): if (np.max(a4[i]) != 0): answers.append(options[np.argmax(a4[i])]) else: answers.append('x') for i in range(4): if (np.max(a5[i]) != 0): answers.append(options[np.argmax(a5[i])]) else: answers.append('x') for i in range(4): if (np.max(a6[i]) != 0): answers.append(options[np.argmax(a6[i])]) else: answers.append('x') for i in range(4): if (np.max(a7[i]) != 0): answers.append(options[np.argmax(a7[i])]) else: answers.append('x') return eno, tids, answers
def decode_iteratively(self, example_generator, total, names_to_types, ssi_list, hps): for example_idx, example in enumerate( tqdm(example_generator, total=total)): raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text = util.unpack_tf_example( example, names_to_types) article_sent_tokens = [ util.process_sent(sent) for sent in raw_article_sents ] groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] if ssi_list is None: # this is if we are doing the upper bound evaluation (ssi_list comes straight from the groundtruth) sys_ssi = groundtruth_similar_source_indices_list if FLAGS.singles_and_pairs == 'singles': sys_ssi = util.enforce_sentence_limit(sys_ssi, 1) elif FLAGS.singles_and_pairs == 'both': sys_ssi = util.enforce_sentence_limit(sys_ssi, 2) sys_ssi = util.replace_empty_ssis(sys_ssi, raw_article_sents) else: gt_ssi, sys_ssi, ext_len = ssi_list[example_idx] if FLAGS.singles_and_pairs == 'singles': sys_ssi = util.enforce_sentence_limit(sys_ssi, 1) groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, 1) gt_ssi = util.enforce_sentence_limit(gt_ssi, 1) elif FLAGS.singles_and_pairs == 'both': sys_ssi = util.enforce_sentence_limit(sys_ssi, 2) groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, 2) gt_ssi = util.enforce_sentence_limit(gt_ssi, 2) if gt_ssi != groundtruth_similar_source_indices_list: print( 'Warning: Example %d has different groundtruth source indices: ' + str(groundtruth_similar_source_indices_list) + ' || ' + str(gt_ssi)) if FLAGS.dataset_name == 'xsum': sys_ssi = [sys_ssi[0]] final_decoded_words = [] final_decoded_outpus = '' best_hyps = [] highlight_html_total = '' for ssi_idx, ssi in enumerate(sys_ssi): selected_raw_article_sents = util.reorder( raw_article_sents, ssi) selected_article_text = ' '.join([ ' '.join(sent) for sent in util.reorder(article_sent_tokens, ssi) ]) selected_doc_indices_str = '0 ' * len( selected_article_text.split()) if FLAGS.upper_bound: selected_groundtruth_summ_sent = [[ groundtruth_summ_sents[0][ssi_idx] ]] else: selected_groundtruth_summ_sent = groundtruth_summ_sents batch = create_batch(selected_article_text, selected_groundtruth_summ_sent, selected_doc_indices_str, selected_raw_article_sents, FLAGS.batch_size, hps, self._vocab) decoded_words, decoded_output, best_hyp = decode_example( self._sess, self._model, self._vocab, batch, example_idx, hps) best_hyps.append(best_hyp) final_decoded_words.extend(decoded_words) final_decoded_outpus += decoded_output if example_idx < 1000: min_matched_tokens = 2 selected_article_sent_tokens = [ util.process_sent(sent) for sent in selected_raw_article_sents ] highlight_summary_sent_tokens = [decoded_words] highlight_ssi_list, lcs_paths_list, highlight_smooth_article_lcs_paths_list = ssi_functions.get_simple_source_indices_list( highlight_summary_sent_tokens, selected_article_sent_tokens, None, 2, min_matched_tokens) highlighted_html = ssi_functions.html_highlight_sents_in_article( highlight_summary_sent_tokens, highlight_ssi_list, selected_article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list= highlight_smooth_article_lcs_paths_list) highlight_html_total += '<u>System Summary</u><br><br>' + highlighted_html + '<br><br>' if len(final_decoded_words) >= 100: break if example_idx < 1000: self.write_for_human(raw_article_sents, groundtruth_summ_sents, final_decoded_words, example_idx) ssi_functions.write_highlighted_html(highlight_html_total, self._highlight_dir, example_idx) rouge_functions.write_for_rouge( groundtruth_summ_sents, None, example_idx, self._rouge_ref_dir, self._rouge_dec_dir, decoded_words=final_decoded_words, log=False ) # write ref summary and decoded summary to file, to eval with pyrouge later example_idx += 1 # this is how many examples we've decoded logging.info("Decoder has finished reading dataset for single_pass.") logging.info("Output has been saved in %s and %s.", self._rouge_ref_dir, self._rouge_dec_dir) if len(os.listdir(self._rouge_ref_dir)) != 0: l_param = 100 logging.info("Now starting ROUGE eval...") results_dict = rouge_functions.rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir, l_param=l_param) rouge_functions.rouge_log(results_dict, self._decode_dir)
while True: if webCam: success, img = cap.read() else: img = cv.imread(path) img = cv.resize(img, (0, 0), None, 0.5, 0.5) img, finalContours = util.genricGetContours(img, minArea=50000, filterr=4) if len(finalContours) != 0: biggest = finalContours[0][2] imgWarp = util.wrapImage(img, biggest, wP, hP, 40) img2, Contours2 = util.genricGetContours(imgWarp, minArea=1000, filterr=4, cThr=[50, 50]) if len(Contours2) != 0: for obj in Contours2: #cv.polylines(img2,[obj[2]],True,(0,255,255),2) nPoints = util.reorder(obj[2]) newWidth = round((util.findDistance( nPoints[0][0] // scale, nPoints[1][0] // scale) / 10), 1) newHeight = round((util.findDistance( nPoints[0][0] // scale, nPoints[2][0] // scale) / 10), 1) cv.arrowedLine(img2, (nPoints[0][0][0], nPoints[0][0][1]), (nPoints[1][0][0], nPoints[1][0][1]), (255, 0, 255), 3, 8, 0, 0.05) cv.arrowedLine(img2, (nPoints[0][0][0], nPoints[0][0][1]), (nPoints[2][0][0], nPoints[2][0][1]), (255, 0, 255), 3, 8, 0, 0.05) x, y, w, h = obj[3] cv.putText(img2, '{}cm'.format(newWidth), (x + 30, y - 10), cv.FONT_HERSHEY_COMPLEX, 2, (255, 0, 255), 2) cv.putText(img2, '{}cm'.format(newHeight), (x - 70, y + h // 2), cv.FONT_HERSHEY_COMPLEX, 2,
def evaluate_example(ex): example, example_idx, qid_ssi_to_importances, qid_ssi_to_token_scores_and_mappings = ex print(example_idx) # example_idx += 1 qid = example_idx raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, corefs, doc_indices = util.unpack_tf_example( example, names_to_types) article_sent_tokens = [ util.process_sent(sent) for sent in raw_article_sents ] enforced_groundtruth_ssi_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, sentence_limit) groundtruth_summ_sent_tokens = [] groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] groundtruth_summ_sent_tokens = [ sent.split(' ') for sent in groundtruth_summ_sents[0] ] if FLAGS.upper_bound: replaced_ssi_list = util.replace_empty_ssis( enforced_groundtruth_ssi_list, raw_article_sents) selected_article_sent_indices = util.flatten_list_of_lists( replaced_ssi_list) summary_sents = [ ' '.join(sent) for sent in util.reorder( article_sent_tokens, selected_article_sent_indices) ] similar_source_indices_list = groundtruth_similar_source_indices_list ssi_length_extractive = len(similar_source_indices_list) else: summary_sents, similar_source_indices_list, summary_sents_for_html, ssi_length_extractive, \ article_lcs_paths_list, token_probs_list = generate_summary(article_sent_tokens, qid_ssi_to_importances, example_idx, qid_ssi_to_token_scores_and_mappings) similar_source_indices_list_trunc = similar_source_indices_list[: ssi_length_extractive] summary_sents_for_html_trunc = summary_sents_for_html[: ssi_length_extractive] if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): summary_sent_tokens = [ sent.split(' ') for sent in summary_sents_for_html_trunc ] if FLAGS.tag_tokens and FLAGS.tag_loss_wt != 0: lcs_paths_list_param = copy.deepcopy(article_lcs_paths_list) else: lcs_paths_list_param = None extracted_sents_in_article_html = html_highlight_sents_in_article( summary_sent_tokens, similar_source_indices_list_trunc, article_sent_tokens, doc_indices=doc_indices, lcs_paths_list=lcs_paths_list_param) # write_highlighted_html(extracted_sents_in_article_html, html_dir, example_idx) groundtruth_ssi_list, gt_lcs_paths_list, gt_article_lcs_paths_list, gt_smooth_article_paths_list = get_simple_source_indices_list( groundtruth_summ_sent_tokens, article_sent_tokens, None, sentence_limit, min_matched_tokens) groundtruth_highlighted_html = html_highlight_sents_in_article( groundtruth_summ_sent_tokens, groundtruth_ssi_list, article_sent_tokens, lcs_paths_list=gt_lcs_paths_list, article_lcs_paths_list=gt_smooth_article_paths_list, doc_indices=doc_indices) all_html = '<u>System Summary</u><br><br>' + extracted_sents_in_article_html + '<u>Groundtruth Summary</u><br><br>' + groundtruth_highlighted_html # all_html = '<u>System Summary</u><br><br>' + extracted_sents_in_article_html write_highlighted_html(all_html, html_dir, example_idx) rouge_functions.write_for_rouge(groundtruth_summ_sents, summary_sents, example_idx, ref_dir, dec_dir) return (groundtruth_similar_source_indices_list, similar_source_indices_list, ssi_length_extractive, token_probs_list)
def evaluate_example(ex): example, example_idx, qid_ssi_to_importances, _, _ = ex print(example_idx) # example_idx += 1 qid = example_idx raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, corefs, doc_indices = util.unpack_tf_example( example, names_to_types) article_sent_tokens = [ util.process_sent(sent) for sent in raw_article_sents ] enforced_groundtruth_ssi_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, sentence_limit) if FLAGS.dataset_name == 'duc_2004': groundtruth_summ_sents = [[ sent.strip() for sent in gt_summ_text.strip().split('\n') ] for gt_summ_text in groundtruth_summary_text] else: groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] groundtruth_summ_sent_tokens = [ sent.split(' ') for sent in groundtruth_summ_sents[0] ] if FLAGS.upper_bound: replaced_ssi_list = util.replace_empty_ssis( enforced_groundtruth_ssi_list, raw_article_sents) selected_article_sent_indices = util.flatten_list_of_lists( replaced_ssi_list) summary_sents = [ ' '.join(sent) for sent in util.reorder( article_sent_tokens, selected_article_sent_indices) ] similar_source_indices_list = groundtruth_similar_source_indices_list ssi_length_extractive = len(similar_source_indices_list) elif FLAGS.lead: lead_ssi_list = [(idx, ) for idx in list( range(util.average_sents_for_dataset[FLAGS.dataset_name]))] lead_ssi_list = lead_ssi_list[:len( raw_article_sents )] # make sure the sentence indices don't go past the total number of sentences in the article selected_article_sent_indices = util.flatten_list_of_lists( lead_ssi_list) summary_sents = [ ' '.join(sent) for sent in util.reorder( article_sent_tokens, selected_article_sent_indices) ] similar_source_indices_list = lead_ssi_list ssi_length_extractive = len(similar_source_indices_list) else: summary_sents, similar_source_indices_list, summary_sents_for_html, ssi_length_extractive = generate_summary( article_sent_tokens, qid_ssi_to_importances, example_idx) similar_source_indices_list_trunc = similar_source_indices_list[: ssi_length_extractive] summary_sents_for_html_trunc = summary_sents_for_html[: ssi_length_extractive] if example_idx <= 100: summary_sent_tokens = [ sent.split(' ') for sent in summary_sents_for_html_trunc ] extracted_sents_in_article_html = html_highlight_sents_in_article( summary_sent_tokens, similar_source_indices_list_trunc, article_sent_tokens, doc_indices=doc_indices) # write_highlighted_html(extracted_sents_in_article_html, html_dir, example_idx) groundtruth_ssi_list, lcs_paths_list, article_lcs_paths_list = get_simple_source_indices_list( groundtruth_summ_sent_tokens, article_sent_tokens, None, sentence_limit, min_matched_tokens) groundtruth_highlighted_html = html_highlight_sents_in_article( groundtruth_summ_sent_tokens, groundtruth_ssi_list, article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list=article_lcs_paths_list, doc_indices=doc_indices) all_html = '<u>System Summary</u><br><br>' + extracted_sents_in_article_html + '<u>Groundtruth Summary</u><br><br>' + groundtruth_highlighted_html write_highlighted_html(all_html, html_dir, example_idx) rouge_functions.write_for_rouge(groundtruth_summ_sents, summary_sents, example_idx, ref_dir, dec_dir) return (groundtruth_similar_source_indices_list, similar_source_indices_list, ssi_length_extractive)
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed Nov 15 01:03:18 2017 @author: ujjaldas223 """ from util import reorder print reorder('What were you doing yesterday') print reorder('When are you going to school') print reorder('I am going to school')
def surrounding_points(point: tuple, delta: float, data: list, reordered_data: list) -> [[]]: """ Given a point, a delta, and the set of points to work with, return a list of tuples containing all surrounding points. This should work for both bases and demands. """ x = point[0] y = point[1] index_of_center = 0 count_found = 0 print_found = True # print (point) # let's find the surrounding x coordinates if reordered_data: x_ordered = reordered_data else: x_ordered = util.reorder(data, 0) for i in range(len(x_ordered)): if x_ordered[i][0] == x: index_of_center = i break if not index_of_center: # print(x_ordered) while x_ordered[index_of_center][0] < x: if index_of_center == len(x_ordered) - 1: break # print(index_of_center,x_ordered[index_of_center][0],x ) index_of_center += 1 #print(index_of_center) i = index_of_center # Go forwards and backwards. For each point, calculate the distance. # If the distance exceeds the delta, then stop curr_x = index_of_center + 1 all_the_surrounding_points = [] while True: # Check that only the x diff > delta in x if curr_x < 0 or curr_x >= len(x_ordered): break # Check if the x_edge is delta away if util.dist(x_ordered[i], (x_ordered[curr_x][0], x_ordered[i][1])) > delta: break distance = util.dist((x, y), x_ordered[curr_x]) count_found = 0 if distance < delta: if print_found: print("Found: ", (x, y), x_ordered[curr_x], "\tDistance:\t", distance) count_found += 1 all_the_surrounding_points.append(x_ordered[curr_x]) curr_x += 1 curr_x = index_of_center - 1 #Identical loop, but going the other direction while True: if curr_x < 0 or curr_x >= len(x_ordered): break if util.dist(x_ordered[i], (x_ordered[curr_x][0], x_ordered[i][1])) > delta: break distance = util.dist(x_ordered[i], x_ordered[curr_x]) if distance < delta: if print_found: print("Found: ", x_ordered[i], x_ordered[curr_x], "\tDistance:\t", distance) count_found += 1 all_the_surrounding_points.append(x_ordered[curr_x]) curr_x -= 1 print(count_found, (x, y), all_the_surrounding_points) print("\n") return all_the_surrounding_points
def main(): #============================================================================ #--1.Preparation for adaboost learning framework with RF as base learner. maxnum_iters = 10 filter_idx_set = set([11, 14, 64, 16]) paths = json.loads(open("SETTINGS.json").read()) print("Getting features for deleted papers from the disk files") features_conf = [feature for feature in \ csv.reader(open(paths["trainpos_features"]))] features_deleted = [feature for feature in \ csv.reader(open(paths["trainneg_features"]))] train_author_confirmed = get_train_confirmed([pair[:2] for pair in features_conf]) train_features = [map(float, x[2:]) for x in features_deleted + features_conf] train_author_paper_ids = [x[:2] for x in features_deleted + features_conf] train_features = feature_selection(train_features, filter_idx_set) train_target = [0 for x in range(len(features_deleted))] + \ [1 for x in range(len(features_conf))] train_labels = np.array([-1 for x in range(len(features_deleted))] + \ [1 for x in range(len(features_conf))]) features_valid = [feature for feature in \ csv.reader(open(paths["vali_features"]))] test_features = [map(float, x[2:]) for x in features_valid] test_features = feature_selection(test_features, filter_idx_set) test_author_confirmed = get_confirmed_paper(paths["vali_solution"]) test_author_paper_ids = [x[:2] for x in features_valid] #============================================================================ #============================================================================ #--2.Start of adaboost learning framework # initialization importance distribution of data points trdata_importance = np.array([1.0/len(train_target) for i in\ range(len(train_target))]) model_weights = np.array([0.0 for i in range(maxnum_iters)]) classifier_set = [] ############################################################################# #--Hyperparameter tuning: the best number of base learners max_map_val = 0.0 bestnum_baselearner_map = 0 ############################################################################# print("Start adaboost learning loops.") for i in range(maxnum_iters): classifier = RandomForestClassifier(n_estimators=50, verbose=2, n_jobs=4, min_samples_split=10, random_state=1) if i == 0: classifier.fit(train_features, train_target) else: classifier.fit(train_features, train_target, trdata_importance) train_predictions = classifier.predict_proba(train_features)[:,1] classifier_set.append(classifier) ######################################################################## #--Hyperparameter tuning: the best number of base learners # the first method to calculate error rate: absolute value difference error_rate = np.dot(trdata_importance,\ np.abs(train_target-train_predictions)) # the second method: negative MAP value '''author_predictions = defaultdict(list) paper_predictions = {} for (a_id, p_id), pred, label in zip(train_author_paper_ids,\ train_predictions, train_target): author_predictions[int(a_id)].append([pred, int(p_id), label]) for author_id in sorted(author_predictions): author_predictions[author_id] = reorder(author_predictions[author_id]) paper_ids_sorted = sorted(author_predictions[author_id], reverse=True) paper_predictions[author_id] = [x[1] for x in paper_ids_sorted] map_val = calcMAP(paper_predictions, train_author_confirmed) error_rate = 1 - map_val''' # the third method: approximate misclassification error '''delta = 0.05 error_rate = np.dot(trdata_importance,\ np.abs(train_target-train_predictions)>delta)''' ######################################################################## print "error rate: %f" % error_rate model_weights[i] = 1.0/5*np.log((1.0-error_rate)/error_rate) print model_weights[i] raw_input() #model_weights = model_weights / np.sum(model_weights) conv_predictions = np.array([(pred-0.5)*2 for pred in train_predictions]) #for j in range(len(conv_predictions)): # if conv_predictions[j] > 0: # conv_predictions[j] = 1 # else: # conv_predictions[j] = -1 trdata_importance = trdata_importance*np.exp(-model_weights[i]*\ train_labels*conv_predictions) trdata_importance = trdata_importance/np.sum(trdata_importance) ######################################################################## #--Hyperparameter tuning: the best number of base learners test_predictions = np.array([0.0 for j in range(len(train_target))]) for j in range(i+1): test_predictions = [pred1+pred2 for pred1,pred2 in\ zip(test_predictions, model_weights[j]*\ classifier_set[j].predict_proba(test_features)[:,1])] author_predictions = defaultdict(list) paper_predictions = {} for (a_id, p_id), pred in zip(test_author_paper_ids, test_predictions): if p_id in test_author_confirmed[int(a_id)]: author_predictions[int(a_id)].append([pred, int(p_id), 1]) else: author_predictions[int(a_id)].append([pred, int(p_id), 0]) for author_id in sorted(author_predictions): author_predictions[author_id] = reorder(author_predictions[author_id]) paper_ids_sorted = sorted(author_predictions[author_id], reverse=True) paper_predictions[author_id] = [x[1] for x in paper_ids_sorted] print("Test the prediction results with MAP metric.") map_val = calcMAP(paper_predictions, test_author_confirmed) print "Iteration #%d: MAP value --> %f" % (i+1, map_val) if map_val > max_map_val: max_map_val = map_val bestnum_baselearner_map = i+1 print "Best MAP value --> %f, best number of learners --> %d\n"\ % (max_map_val, bestnum_baselearner_map) #raw_input() ######################################################################## model_weights = model_weights / np.sum(model_weights) #============================================================================ #============================================================================ #--3.Prediction results on test data test_predictions = np.dot(model_weights,\ np.array([classifier.predict_proba(test_features)[:,1] for\ classifier in classifier_set])) author_predictions = defaultdict(list) paper_predictions = {} for (a_id, p_id), pred in zip(test_author_paper_ids, test_predictions): if p_id in test_author_confirmed[int(a_id)]: author_predictions[int(a_id)].append([pred, int(p_id), 1]) else: author_predictions[int(a_id)].append([pred, int(p_id), 0]) for author_id in sorted(author_predictions): author_predictions[author_id] = reorder(author_predictions[author_id]) paper_ids_sorted = sorted(author_predictions[author_id], reverse=True) paper_predictions[author_id] = [x[1] for x in paper_ids_sorted] print("Test the prediction results with MAP metric.") map_val = calcMAP(paper_predictions, test_author_confirmed) print("Final MAP value: %f" % map_val)
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" if self._example_generator is None: input_gen = self.text_generator( data.example_generator(self._data_path, self._single_pass, self._cnn_500_dm_500, is_original=('with_coref' not in self._data_path))) else: input_gen = self.text_generator(self._example_generator) if self._hps.pg_mmr and self._hps.ssi_data_path != '': # if use pg_mmr and bert print(util.bcolors.OKGREEN + "Loading SSI from BERT at %s" % os.path.join(self._hps.ssi_data_path, 'ssi.pkl') + util.bcolors.ENDC) with open(os.path.join(self._hps.ssi_data_path, 'ssi.pkl')) as f: ssi_triple_list = pickle.load(f) # ssi_list = [ssi_triple[1] for ssi_triple in ssi_triple_list] else: ssi_triple_list = None counter = 0 while True: try: ( article, abstracts, doc_indices_str, raw_article_sents, ssi, article_lcs_paths_list ) = next( input_gen ) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: logging.info( "The example generator for this example queue filling thread has exhausted data." ) if self._single_pass: logging.info( "single_pass mode is on, so we've finished reading dataset. This thread is stopping." ) self._finished_reading = True if ssi_triple_list is not None and counter < len( ssi_triple_list): raise Exception( 'Len of ssi list (%d) is greater than number of examples (%d)' % (len(ssi_triple_list), counter)) break else: raise Exception( "single_pass mode is off but the example generator is out of data; error." ) if ssi_triple_list is not None: if counter >= len(ssi_triple_list): raise Exception( 'Len of ssi list (%d) is less than number of examples (>=%d)' % (len(ssi_triple_list), counter)) ssi_length_extractive = ssi_triple_list[counter][2] ssi = ssi_triple_list[counter][1] ssi = ssi[:ssi_length_extractive] article = article abstracts = [abstract for abstract in abstracts] if type(doc_indices_str) != str: doc_indices_str = doc_indices_str raw_article_sents = [sent for sent in raw_article_sents] all_abstract_sentences = [[ sent.strip() for sent in data.abstract2sents(abstract) ] for abstract in abstracts] if len(all_abstract_sentences) != 0: abstract_sentences = all_abstract_sentences[0] else: abstract_sentences = [] doc_indices = [int(idx) for idx in doc_indices_str.strip().split()] # join_separator = ' [SEP] ' if self._hps.sep else ' ' if self._hps.by_instance: # if we are running iteratively on only instances (a singleton/pair + a summary sentence), not the whole article for abs_idx, abstract_sentence in enumerate( abstract_sentences): inst_ssi = ssi[abs_idx] if len(inst_ssi) == 0: continue inst_abstract_sentences = abstract_sentence inst_raw_article_sents = util.reorder( raw_article_sents, inst_ssi) inst_article = ' '.join([ ' '.join(util.process_sent(sent, whitespace=True)) for sent in inst_raw_article_sents ]) inst_doc_indices = [0] * len(inst_article.split()) inst_article_lcs_paths_list = article_lcs_paths_list[ abs_idx] if len( inst_article ) == 0: # See https://github.com/abisee/pointer-generator/issues/1 logging.warning( 'Found an example with empty article text. Skipping it.\n*********************************************' ) elif len(inst_article.strip().split() ) < 3 and self._hps.skip_with_less_than_3: print( 'Article has less than 3 tokens, so skipping\n*********************************************' ) elif len(inst_abstract_sentences.strip().split() ) < 3 and self._hps.skip_with_less_than_3: print( 'Abstract has less than 3 tokens, so skipping\n*********************************************' ) else: inst_example = Example(None, [inst_abstract_sentences], all_abstract_sentences, None, inst_raw_article_sents, None, [inst_article_lcs_paths_list], self._vocab, self._hps) self._example_queue.put(inst_example) else: example = Example(None, abstract_sentences, all_abstract_sentences, None, raw_article_sents, ssi, article_lcs_paths_list, self._vocab, self._hps) # Process into an Example. self._example_queue.put( example) # place the Example in the example queue. # print "example num", counter counter += 1
""" Created on Tue Nov 21 14:20:52 2017 @author: ayesha """ from util import reorder import json from past import get_verb as get_past_verb from future import get_verb as get_future_verb from present_tense import get_verb as get_present_verb from bibhakti import bibhakti from phonetics import phonetics import re inpt = 'I am going to school with rama' #print inpt l, sen_tag = reorder(inpt) #print l #print 222 output = [l[0]] for item in l: if re.search(r'(?:VB|MD)', item[0]): output.append(item) #tense = {'VBD', 'MD', 'VBP' or 'VBZ'} #print 111 #print output for item in output: if item[0] == 'VBD': p, c = get_past_verb(output) break elif item[0] == 'MD':
def decode_iteratively(self, example_generator, total, names_to_types, ssi_list, hps): attn_vis_idx = 0 for example_idx, example in enumerate( tqdm(example_generator, total=total)): raw_article_sents, groundtruth_similar_source_indices_list, groundtruth_summary_text, corefs, groundtruth_article_lcs_paths_list = util.unpack_tf_example( example, names_to_types) article_sent_tokens = [ util.process_sent(sent) for sent in raw_article_sents ] groundtruth_summ_sents = [[ sent.strip() for sent in groundtruth_summary_text.strip().split('\n') ]] groundtruth_summ_sent_tokens = [ sent.split(' ') for sent in groundtruth_summ_sents[0] ] if ssi_list is None: # this is if we are doing the upper bound evaluation (ssi_list comes straight from the groundtruth) sys_ssi = groundtruth_similar_source_indices_list sys_alp_list = groundtruth_article_lcs_paths_list if FLAGS.singles_and_pairs == 'singles': sys_ssi = util.enforce_sentence_limit(sys_ssi, 1) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 1) elif FLAGS.singles_and_pairs == 'both': sys_ssi = util.enforce_sentence_limit(sys_ssi, 2) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 2) sys_ssi, sys_alp_list = util.replace_empty_ssis( sys_ssi, raw_article_sents, sys_alp_list=sys_alp_list) else: gt_ssi, sys_ssi, ext_len, sys_token_probs_list = ssi_list[ example_idx] sys_alp_list = ssi_functions.list_labels_from_probs( sys_token_probs_list, FLAGS.tag_threshold) if FLAGS.singles_and_pairs == 'singles': sys_ssi = util.enforce_sentence_limit(sys_ssi, 1) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 1) groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, 1) gt_ssi = util.enforce_sentence_limit(gt_ssi, 1) elif FLAGS.singles_and_pairs == 'both': sys_ssi = util.enforce_sentence_limit(sys_ssi, 2) sys_alp_list = util.enforce_sentence_limit(sys_alp_list, 2) groundtruth_similar_source_indices_list = util.enforce_sentence_limit( groundtruth_similar_source_indices_list, 2) gt_ssi = util.enforce_sentence_limit(gt_ssi, 2) # if gt_ssi != groundtruth_similar_source_indices_list: # raise Exception('Example %d has different groundtruth source indices: ' + str(groundtruth_similar_source_indices_list) + ' || ' + str(gt_ssi)) if FLAGS.dataset_name == 'xsum': sys_ssi = [sys_ssi[0]] final_decoded_words = [] final_decoded_outpus = '' best_hyps = [] highlight_html_total = '<u>System Summary</u><br><br>' for ssi_idx, ssi in enumerate(sys_ssi): # selected_article_lcs_paths = None selected_article_lcs_paths = sys_alp_list[ssi_idx] ssi, selected_article_lcs_paths = util.make_ssi_chronological( ssi, selected_article_lcs_paths) selected_article_lcs_paths = [selected_article_lcs_paths] selected_raw_article_sents = util.reorder( raw_article_sents, ssi) selected_article_text = ' '.join([ ' '.join(sent) for sent in util.reorder(article_sent_tokens, ssi) ]) selected_doc_indices_str = '0 ' * len( selected_article_text.split()) if FLAGS.upper_bound: selected_groundtruth_summ_sent = [[ groundtruth_summ_sents[0][ssi_idx] ]] else: selected_groundtruth_summ_sent = groundtruth_summ_sents batch = create_batch(selected_article_text, selected_groundtruth_summ_sent, selected_doc_indices_str, selected_raw_article_sents, selected_article_lcs_paths, FLAGS.batch_size, hps, self._vocab) original_article = batch.original_articles[0] # string original_abstract = batch.original_abstracts[0] # string article_withunks = data.show_art_oovs(original_article, self._vocab) # string abstract_withunks = data.show_abs_oovs( original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string # article_withunks = data.show_art_oovs(original_article, self._vocab) # string # abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string if FLAGS.first_intact and ssi_idx == 0: decoded_words = selected_article_text.strip().split() decoded_output = selected_article_text else: decoded_words, decoded_output, best_hyp = decode_example( self._sess, self._model, self._vocab, batch, example_idx, hps) best_hyps.append(best_hyp) final_decoded_words.extend(decoded_words) final_decoded_outpus += decoded_output if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): min_matched_tokens = 2 selected_article_sent_tokens = [ util.process_sent(sent) for sent in selected_raw_article_sents ] highlight_summary_sent_tokens = [decoded_words] highlight_ssi_list, lcs_paths_list, highlight_article_lcs_paths_list, highlight_smooth_article_lcs_paths_list = ssi_functions.get_simple_source_indices_list( highlight_summary_sent_tokens, selected_article_sent_tokens, None, 2, min_matched_tokens) highlighted_html = ssi_functions.html_highlight_sents_in_article( highlight_summary_sent_tokens, highlight_ssi_list, selected_article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list= highlight_smooth_article_lcs_paths_list) highlight_html_total += highlighted_html + '<br>' if FLAGS.attn_vis and example_idx < 200: self.write_for_attnvis( article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, attn_vis_idx ) # write info to .json file for visualization tool attn_vis_idx += 1 if len(final_decoded_words) >= 100: break gt_ssi_list, gt_alp_list = util.replace_empty_ssis( groundtruth_similar_source_indices_list, raw_article_sents, sys_alp_list=groundtruth_article_lcs_paths_list) highlight_html_gt = '<u>Reference Summary</u><br><br>' for ssi_idx, ssi in enumerate(gt_ssi_list): selected_article_lcs_paths = gt_alp_list[ssi_idx] try: ssi, selected_article_lcs_paths = util.make_ssi_chronological( ssi, selected_article_lcs_paths) except: util.print_vars(ssi, example_idx, selected_article_lcs_paths) raise selected_raw_article_sents = util.reorder( raw_article_sents, ssi) if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): min_matched_tokens = 2 selected_article_sent_tokens = [ util.process_sent(sent) for sent in selected_raw_article_sents ] highlight_summary_sent_tokens = [ groundtruth_summ_sent_tokens[ssi_idx] ] highlight_ssi_list, lcs_paths_list, highlight_article_lcs_paths_list, highlight_smooth_article_lcs_paths_list = ssi_functions.get_simple_source_indices_list( highlight_summary_sent_tokens, selected_article_sent_tokens, None, 2, min_matched_tokens) highlighted_html = ssi_functions.html_highlight_sents_in_article( highlight_summary_sent_tokens, highlight_ssi_list, selected_article_sent_tokens, lcs_paths_list=lcs_paths_list, article_lcs_paths_list= highlight_smooth_article_lcs_paths_list) highlight_html_gt += highlighted_html + '<br>' if example_idx < 100 or (example_idx >= 2000 and example_idx < 2100): self.write_for_human(raw_article_sents, groundtruth_summ_sents, final_decoded_words, example_idx) highlight_html_total = ssi_functions.put_html_in_two_columns( highlight_html_total, highlight_html_gt) ssi_functions.write_highlighted_html(highlight_html_total, self._highlight_dir, example_idx) # if example_idx % 100 == 0: # attn_dir = os.path.join(self._decode_dir, 'attn_vis_data') # attn_selections.process_attn_selections(attn_dir, self._decode_dir, self._vocab) rouge_functions.write_for_rouge( groundtruth_summ_sents, None, example_idx, self._rouge_ref_dir, self._rouge_dec_dir, decoded_words=final_decoded_words, log=False ) # write ref summary and decoded summary to file, to eval with pyrouge later # if FLAGS.attn_vis: # self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, example_idx) # write info to .json file for visualization tool example_idx += 1 # this is how many examples we've decoded logging.info("Decoder has finished reading dataset for single_pass.") logging.info("Output has been saved in %s and %s.", self._rouge_ref_dir, self._rouge_dec_dir) if len(os.listdir(self._rouge_ref_dir)) != 0: if FLAGS.dataset_name == 'xsum': l_param = 100 else: l_param = 100 logging.info("Now starting ROUGE eval...") results_dict = rouge_functions.rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir, l_param=l_param) rouge_functions.rouge_log(results_dict, self._decode_dir)
def generate_summary(article_sent_tokens, qid_ssi_to_importances, example_idx, qid_ssi_to_token_scores_and_mappings): qid = example_idx summary_sent_tokens = [] summary_tokens = util.flatten_list_of_lists(summary_sent_tokens) already_used_source_indices = [] similar_source_indices_list = [] summary_sents_for_html = [] article_lcs_paths_list = [] token_probs_list = [] ssi_length_extractive = None while len(summary_tokens) < 300: if len(summary_tokens) >= l_param and ssi_length_extractive is None: ssi_length_extractive = len(similar_source_indices_list) # if FLAGS.dataset_name == 'xsum' and len(summary_tokens) > 0: # ssi_length_extractive = len(similar_source_indices_list) # break if FLAGS.use_mmr: score_dict = util.calc_MMR_source_indices(article_sent_tokens, summary_tokens, None, qid_ssi_to_importances, qid=qid) else: score_dict = qid_ssi_to_importances[qid] sents, source_indices = get_best_source_sents( article_sent_tokens, score_dict, already_used_source_indices) if len(source_indices) == 0: break token_scores, token_mappings = get_token_info_for_ssi( qid_ssi_to_token_scores_and_mappings, qid, source_indices) # if np.max(token_mappings) != token_cons_scores = consolidate_token_scores(token_scores, token_mappings) if len(token_cons_scores) != len(sents): print(token_cons_scores, sents) raise Exception('Len of token_cons_scores %d != Len of sents %d' % (len(token_cons_scores), len(sents))) padded_token_cons_scores = [ ] # we need to pad it, because sometimes the instance was too long for BERT, so it got truncated. So we need to fill the end of the sentences with 0 probabilities. for sent_idx, sent_scores in enumerate(token_cons_scores): sent = sents[sent_idx] if len(sent_scores) > len(sent): print(token_cons_scores, sents) raise Exception('Len of sent_scores %d > Len of sent %d' % (len(sent_scores), len(sent))) while len(sent_scores) < len(sent): sent_scores.append(0.) padded_token_cons_scores.append(sent_scores) token_probs_list.append(padded_token_cons_scores) token_tags = threshold_token_scores( padded_token_cons_scores, FLAGS.tag_threshold ) # shape (1 or 2, len(sent)) 1 or 2 depending on if it is singleton/pair article_lcs_paths = ssi_functions.binary_tags_to_list(token_tags) article_lcs_paths_list.append(article_lcs_paths) # if FLAGS.tag_tokens and FLAGS.tag_loss_wt != 0: # sents_only_tagged = filter_untagged(sents, token_tags) # summary_sent_tokens.extend(sents_only_tagged) # else: summary_sent_tokens.extend(sents) summary_tokens = util.flatten_list_of_lists(summary_sent_tokens) similar_source_indices_list.append(source_indices) summary_sents_for_html.append(' <br> '.join( [' '.join(sent) for sent in sents])) if filter_sentences: already_used_source_indices.extend(source_indices) if ssi_length_extractive is None: ssi_length_extractive = len(similar_source_indices_list) selected_article_sent_indices = util.flatten_list_of_lists( similar_source_indices_list[:ssi_length_extractive]) summary_sents = [ ' '.join(sent) for sent in util.reorder(article_sent_tokens, selected_article_sent_indices) ] # summary = '\n'.join([' '.join(tokens) for tokens in summary_sent_tokens]) return summary_sents, similar_source_indices_list, summary_sents_for_html, ssi_length_extractive, article_lcs_paths_list, token_probs_list
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed Nov 15 01:03:18 2017 @author: akankshya """ from util import reorder import re ##print reorder('What were you doing yesterday') ##print reorder('When are you going to school') l = reorder('i was going to school') #print l output = [l[0]] for item in l: if re.search(r'(?:VB|MD)', item[0]): output.append(item) print output #output = filter(output, None) #print output def ret_out(): return output
def fill_example_queue(self): """Reads data from file and processes into Examples which are then placed into the example queue.""" if self._example_generator is None: input_gen = self.text_generator( data.example_generator(self._data_path, self._single_pass, self._cnn_500_dm_500, is_original=False)) else: input_gen = self.text_generator(self._example_generator) counter = 0 while True: try: (article, abstracts, doc_indices_str, raw_article_sents, ssi) = next(input_gen) # read the next example from file. article and abstract are both strings. except StopIteration: # if there are no more examples: logging.info("The example generator for this example queue filling thread has exhausted data.") if self._single_pass: logging.info( "single_pass mode is on, so we've finished reading dataset. This thread is stopping.") self._finished_reading = True break else: raise Exception("single_pass mode is off but the example generator is out of data; error.") article = article abstracts = [abstract for abstract in abstracts] if type(doc_indices_str) != str: doc_indices_str = doc_indices_str raw_article_sents = [sent for sent in raw_article_sents] all_abstract_sentences = [[sent.strip() for sent in data.abstract2sents( abstract)] for abstract in abstracts] if len(all_abstract_sentences) != 0: abstract_sentences = all_abstract_sentences[0] else: abstract_sentences = [] doc_indices = [int(idx) for idx in doc_indices_str.strip().split()] if self._hps.by_instance: # if we are running iteratively on only instances (a singleton/pair + a summary sentence), not the whole article for abs_idx, abstract_sentence in enumerate(abstract_sentences): inst_ssi = ssi[abs_idx] if len(inst_ssi) == 0: continue inst_abstract_sentences = abstract_sentence inst_raw_article_sents = util.reorder(raw_article_sents, inst_ssi) inst_article = ' '.join([' '.join(util.process_sent(sent, whitespace=True)) for sent in inst_raw_article_sents]) inst_doc_indices = [0] * len(inst_article.split()) if len(inst_article) == 0: # See https://github.com/abisee/pointer-generator/issues/1 logging.warning( 'Found an example with empty article text. Skipping it.\n*********************************************') elif len(inst_article.strip().split()) < 3 and self._hps.skip_with_less_than_3: print( 'Article has less than 3 tokens, so skipping\n*********************************************') elif len(inst_abstract_sentences.strip().split()) < 3 and self._hps.skip_with_less_than_3: print( 'Abstract has less than 3 tokens, so skipping\n*********************************************') else: inst_example = Example(inst_article, [inst_abstract_sentences], all_abstract_sentences, inst_doc_indices, inst_raw_article_sents, None, self._vocab, self._hps) self._example_queue.put(inst_example) else: example = Example(article, abstract_sentences, all_abstract_sentences, doc_indices, raw_article_sents, ssi, self._vocab, self._hps) # Process into an Example. self._example_queue.put(example) # place the Example in the example queue. # print "example num", counter counter += 1
def main(): #============================================================================ #--0.Preparation for training model. maxnum_baselearner = 2 filter_idx_set = set([11, 14, 64, 16]) paths = json.loads(open("SETTINGS.json").read()) print("Getting features for deleted papers from the disk files") features_conf = [feature for feature in \ csv.reader(open(paths["trainpos_features"]))] features_deleted = [feature for feature in \ csv.reader(open(paths["trainneg_features"]))] train_features = [map(float, x[2:]) for x in features_deleted + features_conf] train_features = feature_selection(train_features, filter_idx_set) train_target = [0 for x in range(len(features_deleted))] + [1 for x in range(len(features_conf))] features_valid = [feature for feature in \ csv.reader(open(paths["vali_features"]))] test_features = [map(float, x[2:]) for x in features_valid] test_features = feature_selection(test_features, filter_idx_set) author_confirmed = get_confirmed_paper(paths["vali_solution"]) test_author_paper_ids = [x[:2] for x in features_valid] #============================================================================ #============================================================================ #--2.Model training (Random forest) base_learner_set = [] classifier = RandomForestClassifier(n_estimators=360, verbose=2, n_jobs=4, min_samples_split=10, random_state=1) base_learner_set.append(classifier) regressor = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=450, max_depth=2) base_learner_set.append(regressor) max_map_val = 0.0 bestnum_baselearner_map = 0 min_mse_val = 1e5 bestnum_baselearner_mse = 0 dynamic_target = [i for i in train_target] for i in range(maxnum_baselearner): train_predictions = [0.0 for j in range(len(train_target))] test_predictions = [0.0 for j in range(len(test_features))] base_learner_set[i].fit(train_features, dynamic_target) for j in range(i+1): if j == 0: unit_predictions = base_learner_set[j].predict_proba(train_features)[:,1] elif j >= 1: unit_predictions = base_learner_set[j].predict(train_features) print unit_predictions[0:5] print unit_predictions[100:105] train_predictions = [pred1+pred2 for pred1, pred2 in\ zip(train_predictions, list(unit_predictions))] print train_predictions[0:5] print train_predictions[100:105] if j == 0: unit_predictions = base_learner_set[j].predict_proba(test_features)[:,1] elif j >= 1: unit_predictions = base_learner_set[j].predict(test_features) test_predictions = [pred1+pred2 for pred1, pred2 in\ zip(test_predictions, list(unit_predictions))] dynamic_target = [target-pred for target, pred in\ zip(train_target, train_predictions)] print dynamic_target[:10] author_predictions = defaultdict(list) paper_predictions = {} mse_predictions = [] mse_labels = [] for (a_id, p_id), pred in zip(test_author_paper_ids, test_predictions): if p_id in author_confirmed[int(a_id)]: author_predictions[int(a_id)].append([pred, int(p_id), 1]) mse_labels.append(1) else: author_predictions[int(a_id)].append([pred, int(p_id), 0]) mse_labels.append(0) mse_predictions.append(pred) for author_id in sorted(author_predictions): author_predictions[author_id] = reorder(author_predictions[author_id]) paper_ids_sorted = sorted(author_predictions[author_id], reverse=True) paper_predictions[author_id] = [x[1] for x in paper_ids_sorted] print("Test the prediction results with MAP metric.") map_val = calcMAP(paper_predictions, author_confirmed) mse_val = mean_squared_error(mse_labels, mse_predictions) print "Iteration #%d: MAP value --> %f, MSE value -->%f\n"\ % (i+1, map_val, mse_val) if map_val > max_map_val: max_map_val = map_val bestnum_baselearner_map = i+1 print "Best MAP value --> %f, best number of learners --> %d\n"\ % (max_map_val, bestnum_baselearner_map) if mse_val < min_mse_val: min_mse_val = mse_val bestnum_baselearner_mse = i+1 print "Best MSE value --> %f, best number of learners --> %d\n"\ % (min_mse_val, bestnum_baselearner_mse) pickle.dump(base_learner_set[0], open('./rf-model.pickle', "w")) pickle.dump(base_learner_set[1], open('./gbrt-model.pickle', "w"))
def autoprocess(cap, src): ######################################################################## # usercount = os.getenv('COUNT') # IPCamFeed = # DefaultCameFeed = pathImage = "1.jpg" #cap = cv2.VideoCapture(0) # url = "http://192.168.1.18:8080/shot.jpg" # cap = cv2.VideoCapture(url) cap.set(10,160) heightImg = 640 widthImg = 480 url = 'http://192.168.43.1:8080/shot.jpg' ######################################################################## util.initializeTrackbars() count=0 while True: if src == 2: #success, img = cap.read() img_resp = requests.get(url) img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8) img = cv2.imdecode(img_arr, -1) elif src == 1: ret, img = cap.read() else:img = cv2.imread(pathImage) img = cv2.resize(img, (widthImg, heightImg)) # RESIZE IMAGE imgBlank = np.zeros((heightImg,widthImg, 3), np.uint8) # CREATE A BLANK IMAGE FOR TESTING DEBUGING IF REQUIRED imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # CONVERT IMAGE TO GRAY SCALE imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) # ADD GAUSSIAN BLUR thres=util.valTrackbars() # GET TRACK BAR VALUES FOR THRESHOLDS imgThreshold = cv2.Canny(imgBlur,thres[0],thres[1]) # APPLY CANNY BLUR kernel = np.ones((5, 5)) imgDial = cv2.dilate(imgThreshold, kernel, iterations=2) # APPLY DILATION imgThreshold = cv2.erode(imgDial, kernel, iterations=1) # APPLY EROSION ## FIND ALL COUNTOURS imgContours = img.copy() # COPY IMAGE FOR DISPLAY PURPOSES imgBigContour = img.copy() # COPY IMAGE FOR DISPLAY PURPOSES contours, hierarchy = cv2.findContours(imgThreshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # FIND ALL CONTOURS cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 10) # DRAW ALL DETECTED CONTOURS # FIND THE BIGGEST COUNTOUR biggest, maxArea = util.biggestContour(contours) # FIND THE BIGGEST CONTOUR if biggest.size != 0: biggest=util.reorder(biggest) cv2.drawContours(imgBigContour, biggest, -1, (0, 255, 0), 20) # DRAW THE BIGGEST CONTOUR imgBigContour = util.drawRectangle(imgBigContour,biggest,2) pts1 = np.float32(biggest) # PREPARE POINTS FOR WARP pts2 = np.float32([[0, 0],[widthImg, 0], [0, heightImg],[widthImg, heightImg]]) # PREPARE POINTS FOR WARP matrix = cv2.getPerspectiveTransform(pts1, pts2) imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg)) #REMOVE 20 PIXELS FORM EACH SIDE imgWarpColored=imgWarpColored[20:imgWarpColored.shape[0] - 20, 20:imgWarpColored.shape[1] - 20] imgWarpColored = cv2.resize(imgWarpColored,(widthImg,heightImg)) # APPLY ADAPTIVE THRESHOLD imgWarpGray = cv2.cvtColor(imgWarpColored,cv2.COLOR_BGR2GRAY) imgAdaptiveThre= cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2) imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre) imgAdaptiveThre=cv2.medianBlur(imgAdaptiveThre,3) canny_img = Canny_detector(img) tess(imgWarpColored) # Image Array for Display # imageArray = ([img,imgGray,imgThreshold,imgContours], # [imgBigContour,imgWarpColored, imgWarpGray,imgAdaptiveThre]) imageArray = ([img,imgContours], [imgBigContour,imgAdaptiveThre]) if count <=0 : cv2.imwrite("doc.jpeg", imgAdaptiveThre) count+=1 cases = 1 lables = [["Original","Contours"], ["Biggest Contour","Adaptive Threshold"]] stackedImage = util.stackImages(imageArray,0.75,lables) cv2.imshow("Result",stackedImage) if cases == 1 and cv2.waitKey(25) & 0xFF == ord('s'): cv2.imwrite("auto/autodoc"+ str(time.time())+ ".jpg",imgAdaptiveThre) print("saved") else: # imageArray = ([img,imgGray,imgThreshold,imgContours], # [imgBlank, imgBlank, imgBlank, imgBlank]) imageArray = ([img,imgContours], [img,img]) lables = [["Original","Contours"], ["No Contour","No Adaptive Threshold"]] cases = 2 stackedImage = util.stackImages(imageArray,0.75,lables) cv2.imshow("Result",stackedImage) # # LABELS FOR DISPLAY # lables = [["Original","Gray","Threshold","Contours"], # ["Biggest Contour","Warp Prespective","Warp Gray","Adaptive Threshold"]] # LABELS FOR DISPLAY # stackedImage = util.stackImages(imageArray,0.75,lables) # cv2.imshow("Result",stackedImage) # SAVE IMAGE WHEN 's' key is pressed if cv2.waitKey(1) and 0xFF == ord('q'): cv2.rectangle(stackedImage, ((int(stackedImage.shape[1] / 2) - 230), int(stackedImage.shape[0] / 2) + 50), (1100, 350), (0, 255, 0), cv2.FILLED) cv2.putText(stackedImage, "Scan Saved", (int(stackedImage.shape[1] / 2) - 200, int(stackedImage.shape[0] / 2)), cv2.FONT_HERSHEY_DUPLEX, 3, (0, 0, 255), 5, cv2.LINE_AA) cv2.imshow('Result', stackedImage) cv2.waitKey(300) print(cases)