def test_derived_terms_pitaa(): _defns, heads = parse_enwiktionary_page("pitää", read_data("pitaa")) found = 0 for head in heads: if head["tag"] != "deriv": continue found += 1 assert 23 <= found <= 27
def test_parse_min_results(entry): """ Smoke test to check parsing returns a minimum number of definitions. """ defns, _heads = parse_enwiktionary_page(entry, read_data(entry)) got_senses = len(flat_roundtrip_senses(defns)) min_senses = MIN_LENGTHS[entry] assert got_senses >= min_senses, "Needed {} senses for {} but got {}".format( min_senses, entry, got_senses )
def test_compound_fi(compound, subwords): defns, heads = parse_enwiktionary_page(compound, read_data(compound)) found = 0 for head in heads: if head["tag"] != "etymology-heading": continue assert head["ety_idx"] is None assert len(head["etys"]) == 1 assert len(head["etys"][0]["bits"]) == len(subwords) for bit, subword in zip(head["etys"][0]["bits"], subwords): assert re.match(subword, bit["headword"]) found += 1 assert found == 1
def test_parse_no_exceptions(entry): parse_enwiktionary_page(entry, read_data(entry), skip_ety=True)
def test_gram_note_has_formatting(): defns, heads = parse_enwiktionary_page("test", THING) assert "thing" in defns["Noun"][0].cleaned_defn assert "elative" not in defns["Noun"][0].cleaned_defn
def test_maki_not_gram_note(): defns, heads = parse_enwiktionary_page("maki", read_data("maki")) assert ( "a relatively large, usually rounded elevation of earth" in defns["Noun"][0].cleaned_defn )
def test_saattaa(): defns, heads = parse_enwiktionary_page("saattaa", read_data("saattaa")) verb_4_1 = defns["Verb"][3].subsenses[0].cleaned_defn assert "might" in verb_4_1 assert "do, probably do" in verb_4_1
def test_pitaa_gram_rm(): defns, _heads = parse_enwiktionary_page("pitaa", read_data("pitaa")) to_like_defn = defns["Verb"][2] assert "like" in to_like_defn.cleaned_defn assert "elative" not in to_like_defn.cleaned_defn
def test_vuotta_head_gram(): defns, _heads = parse_enwiktionary_page("vuotta", read_data("vuotta")) ety1_form = defns["Etymology 1"]["Noun"][0].morph assert ety1_form and ety1_form["case"] == "abessive" ety2_form = defns["Etymology 2"]["Noun"][0].morph assert ety2_form and ety2_form["case"] == "partitive"
def parse_file(filename): defns = parse_enwiktionary_page(filename, filename.read()) if defns is None: print("No definitions found") pprint(defns)