def parse(): book_names = library.get_indexes_in_category('Torah') names = node_names() parsed = {} for book_num, filename in enumerate(filenames()): with codecs.open(filename, 'r', 'utf-8') as infile: current = util.file_to_ja([[[]]], infile, [u'@88', u'@44'], sefat_parse_helper).array() parsed[book_names[book_num]] = util.clean_jagged_array(current, [u'@[0-9]{2}', u'\?']) for book in book_names: parashot = names[book].keys() parsed[book] = util.simple_to_complex(parashot, parsed[book]) for parsha in parashot: parsed[book][parsha] = util.simple_to_complex(names[book][parsha], parsed[book][parsha]) return parsed
def produce_parsed_data(filename): with codecs.open(filename, 'r', 'utf-8') as datafile: parsed = util.file_to_ja(3, datafile, (m_pattern, comment_pattern), nothing) datafile.seek(0) names = util.grab_section_names(m_pattern, datafile, 1) names = [int(util.getGematria(name)) for name in names] comp_text = util.simple_to_complex(names, parsed.array()) parsed = util.convert_dict_to_array(comp_text) return parsed
def produce_parsed_data(filename): with codecs.open(filename, 'r', 'utf-8') as datafile: parsed = util.file_to_ja([[[]]], datafile, (m_pattern, comment_pattern), nothing) datafile.seek(0) names = util.grab_section_names(m_pattern, datafile, 1) names = [int(util.getGematria(name)) for name in names] comp_text = util.simple_to_complex(names, parsed.array()) parsed = util.convert_dict_to_array(comp_text) return parsed
def parse(): book_names = library.get_indexes_in_category('Torah') names = node_names() parsed = {} for book_name, filename in zip(book_names, filenames()): with codecs.open(filename, 'r', 'utf-8') as infile: current = util.file_to_ja(2, infile, [u'@88'], sefat_parse_helper).array() parsed[book_name] = util.clean_jagged_array( current, [u'@[0-9]{2}', u'\?']) for book in book_names: parashot = names[book].keys() parsed[book] = util.simple_to_complex(parashot, parsed[book]) return parsed
def align_boaz_chapters(source_file, simple_array): """ Boaz does not guarantee text for every chapter. Using the util library, this method will pad the parsed text with empty sections as necessary to accurately represent the data. :param source_file: File from which to derive chapter numbers :param simple_array: A "naive" parse of the data structured as a nested list. :return: Nested array, with proper padding to account for empty chapters. """ # grab each chapter number from the source file chapters = [ util.getGematria(n) for n in util.grab_section_names( u'@00פרק ([\u05d0-\u05ea]{1,2})', source_file, 1) ] as_dict = util.simple_to_complex(chapters, simple_array) return util.convert_dict_to_array(as_dict)