def encoding_of_doc_matches(test_docx_filename, expected_json_patterns): docx_as_dict = gita_encode.encode_doc( os.path.join('../test_data', test_docx_filename)) docx_as_jsonstr_nospace = strip_whitespaces(json.dumps(docx_as_dict)) matched = True for expected_json_regex in expected_json_patterns: expected_regex_nospace = strip_whitespaces(expected_json_regex) if re.search(expected_regex_nospace, docx_as_jsonstr_nospace) is None: print(f'--Regex not matched: {expected_json_regex}') matched = False return matched
del content_to_write["book-keep"] verses.append({ "id": "*", "chapter": content["book-keep"]["chapterhead"], "shloka": content["book-keep"]["shlokahead"], "style": "shloka", "type": "text", "content": content_to_write }) paras = docx_as_dict['paragraphs'] content = blank_content() for i in range(len(paras)): content = form_presentable(paras, i, content) if content_boundary(paras, i): last_insight = extract_last_insight(content) add_to_verses(content) content = initial_content({"book-keep": {"lastinsight": last_insight}}) return {"Verses": verses} if __name__ == '__main__': docx_as_dict = gita_encode.encode_doc('GitaBhashya-try.docx') with open('paras.json', 'w') as docx_dict_file: json.dump(docx_as_dict, docx_dict_file, indent=2) print('Wrote docx to paras.json') verse_json = extract_verses(docx_as_dict) with open('verse.json', 'w') as verses_file: json.dump(verse_json, verses_file, indent=2) print("Wrote the verses to verse.json")
import os import json import gita_encode docx_as_dict = gita_encode.encode_doc( os.path.join('../test_data', 'sample chapter.docx')) with open('sample chapter.json', 'w') as jsonfile: json.dump(docx_as_dict, jsonfile, indent=2)
def paras_from(docx_filename): docx_as_dict = gita_encode.encode_doc( os.path.join('../test_data', docx_filename)) return in_para_allcontent.paralist(docx_as_dict)