def write_overall_word_counts(paras): text = '' for para in paras_without_shlokas(paras): text += decoder.text_with_phrases( in_para_allcontent.contentlist(para)) + ' ' word_counts = counter.count_significant_words(text) write_wordcounts_as_csv('GitaBhashya-try-counts.csv', word_counts) print("Wrote counts to GitaBhashya-try-counts.csv")
def test_link_is_encoded_as_phrase(self): links_match = [] for para in paras_from('bookmark with link.docx'): phrase_contents = in_para_allcontent.pick_contents\ (in_para_allcontent.contentlist(para), lambda x: x["type"] == "phrase") for content in phrase_contents: links_match.append( matcher.match(in_para_phrase.content_regex, content)) self.assertAllAreOk(links_match)
def test_bookmark_is_encoded_as_anchor(self): anchors_match = [] for para in paras_from('anchor.docx'): anchor_contents = in_para_allcontent.pick_contents\ (in_para_allcontent.contentlist(para), lambda x: x['type'] == "anchor") for content in anchor_contents: anchors_match.append( matcher.match(in_para_bookmark.content_regex, content)) self.assertAllAreOk(anchors_match)
def test_reference_is_encoded_as_external(self): extrefs_match = [] for para in paras_from('externalref.docx'): extref_contents = in_para_allcontent.pick_contents\ (in_para_allcontent.contentlist(para), lambda x: x["type"] == "extref") for content in extref_contents: extrefs_match.append( matcher.match(in_para_externalref.content_regex, content)) self.assertAllAreOk(extrefs_match)
def test_link_to_html_is_encoded_as_phrase(self): links_match = [] para_with_link = paras_from('link to html.docx')[0] phrase_contents = in_para_allcontent.pick_contents\ (in_para_allcontent.contentlist(para_with_link), lambda x: x["type"] == "phrase") for content in phrase_contents: links_match.append( matcher.match(in_para_phrase.content_regex, content)) self.assertAllAreOk(links_match)
def write_chapter_wordmap(paras): chapter_texts = {} for para in paras_without_shlokas(paras): append( chapter_texts, para['chapter'], decoder.text_with_phrases(in_para_allcontent.contentlist(para)) + ' ') chapter_word_counts = pd.DataFrame() for chapter in chapter_texts: word_counts = counter.count_significant_words(chapter_texts[chapter]) for count_pair in word_counts: chapter_word_counts.at[count_pair[0], chapter] = count_pair[1] counter.chapter_wordcounts_to_heatmap(chapter_word_counts) chapter_word_counts.to_csv('GitaBhashya-try-chapmap.csv') print("Wrote chapter-wise counts to GitaBhashya-try-chapmap.csv") make_html_heatmap(chapter_word_counts, 'GitaBhashya-try-heatmap.html')