Пример #1
0
def write_overall_word_counts(paras):
    text = ''
    for para in paras_without_shlokas(paras):
        text += decoder.text_with_phrases(
            in_para_allcontent.contentlist(para)) + ' '
    word_counts = counter.count_significant_words(text)
    write_wordcounts_as_csv('GitaBhashya-try-counts.csv', word_counts)
    print("Wrote counts to GitaBhashya-try-counts.csv")
Пример #2
0
 def test_link_is_encoded_as_phrase(self):
     links_match = []
     for para in paras_from('bookmark with link.docx'):
         phrase_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para), lambda x: x["type"] == "phrase")
         for content in phrase_contents:
             links_match.append(
                 matcher.match(in_para_phrase.content_regex, content))
     self.assertAllAreOk(links_match)
Пример #3
0
 def test_bookmark_is_encoded_as_anchor(self):
     anchors_match = []
     for para in paras_from('anchor.docx'):
         anchor_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para), lambda x: x['type'] == "anchor")
         for content in anchor_contents:
             anchors_match.append(
                 matcher.match(in_para_bookmark.content_regex, content))
     self.assertAllAreOk(anchors_match)
Пример #4
0
 def test_reference_is_encoded_as_external(self):
     extrefs_match = []
     for para in paras_from('externalref.docx'):
         extref_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para), lambda x: x["type"] == "extref")
         for content in extref_contents:
             extrefs_match.append(
                 matcher.match(in_para_externalref.content_regex, content))
     self.assertAllAreOk(extrefs_match)
Пример #5
0
 def test_link_to_html_is_encoded_as_phrase(self):
     links_match = []
     para_with_link = paras_from('link to html.docx')[0]
     phrase_contents = in_para_allcontent.pick_contents\
             (in_para_allcontent.contentlist(para_with_link), lambda x: x["type"] == "phrase")
     for content in phrase_contents:
         links_match.append(
             matcher.match(in_para_phrase.content_regex, content))
     self.assertAllAreOk(links_match)
Пример #6
0
def write_chapter_wordmap(paras):
    chapter_texts = {}
    for para in paras_without_shlokas(paras):
        append(
            chapter_texts, para['chapter'],
            decoder.text_with_phrases(in_para_allcontent.contentlist(para)) +
            ' ')
    chapter_word_counts = pd.DataFrame()
    for chapter in chapter_texts:
        word_counts = counter.count_significant_words(chapter_texts[chapter])
        for count_pair in word_counts:
            chapter_word_counts.at[count_pair[0], chapter] = count_pair[1]
    counter.chapter_wordcounts_to_heatmap(chapter_word_counts)
    chapter_word_counts.to_csv('GitaBhashya-try-chapmap.csv')
    print("Wrote chapter-wise counts to GitaBhashya-try-chapmap.csv")
    make_html_heatmap(chapter_word_counts, 'GitaBhashya-try-heatmap.html')