def test_synonyms_count_none(): drop_caches() # Lemmas are properly counted. assert len(SubstitutionFeaturesMixin._synonyms_count()) == 147306 # Lemmas are all lowercase. for word in SubstitutionFeaturesMixin._synonyms_count(): assert word.islower() or is_int(word[0]) or is_int(word[-1])
def test_synonyms_count(): drop_caches() # 'hello' has a single synset, with 5 members. So 4 synonyms. assert SubstitutionFeaturesMixin._synonyms_count("hello") == 4 # 'mountain' has two synsets, with 2 and 27 members. # So ((2-1) + (27-1))/2 synonyms. assert SubstitutionFeaturesMixin._synonyms_count("mountain") == 13.5 # 'lamp' has two synsets, with only one member in each. # So no synonyms, which yields `np.nan`. assert np.isnan(SubstitutionFeaturesMixin._synonyms_count("lamp")) # 'makakiki' does not exist. assert np.isnan(SubstitutionFeaturesMixin._synonyms_count("makakiki"))