def test_numeral_tables(tmprepo): glottolog = Glottolog(tmprepo['glottolog']) d = list(find_tables([tmprepo['raw'] / 'Abui.htm']))[0] assert len(d) == 7 entry = NumeralsEntry( base_name=d[0], tables=d[1], file_name=d[2], title_name=d[3], codes=glottolog.languoids_by_code(), iso=glottolog.iso.languages, source=d[4], base=d[5], comment=d[6], ) assert len(entry.tables) == 8 assert entry.get_numeral_lexemes()[0][0][6][0] == 'tä.ˈlä.mä'
def test_num_entry(tmprepo, x, expected): raw_htmls = tmprepo['raw'] glottolog = Glottolog(tmprepo['glottolog']) f = raw_htmls / x d = list(find_tables([f]))[0] entry = NumeralsEntry( base_name=d[0], tables=d[1], file_name=d[2], title_name=d[3], codes=glottolog.languoids_by_code(), iso=glottolog.iso.languages, source=d[4], base=d[5], comment=d[6], ) assert entry.base_name == Path(f).stem assert entry.glottocodes[0] == expected
def test_fuzzy_number_matching(tmprepo): glottolog = Glottolog(tmprepo['glottolog']) d = list(find_tables([tmprepo['raw'] / 'Aari.htm']))[0] entry = NumeralsEntry( base_name=d[0], tables=d[1], file_name=d[2], title_name=d[3], codes=glottolog.languoids_by_code(), iso=glottolog.iso.languages, source=d[4], base=d[5], comment=d[6], ) numeral_table = entry.tables[1] table_elements = numeral_table.find_all('tr') cell_content = [] for row in table_elements: cols = row.find_all('td') cols = [ele.text.strip() for ele in cols] cell_content.append([ele for ele in cols if ele]) # Table is roughly structured like this: # 1 | 21 # 2 | 22 # 3 | 23 # ... # 10 | 30 # .. # 20 | 2000 assert parse_number(cell_content[0][0]) == 1 assert parse_number(cell_content[0][1]) == 21 assert parse_number(cell_content[9][0]) == 10 assert parse_number(cell_content[19][1]) == 2000