def test_orthographic_density_none(): drop_caches() # Lemmas are all lowercase. for word in SubstitutionFeaturesMixin._orthographic_density(): assert word.islower() # And it's computed right. drop_caches() with settings.file_override("CLEARPOND"): with open(settings.CLEARPOND, "w") as f: f.write("dog" + 5 * "\t" + "2" + 24 * "\t" + "3\n" "cat" + 5 * "\t" + "2" + 24 * "\t" + "3") assert set(SubstitutionFeaturesMixin._orthographic_density()) == {"dog", "cat"}
def test_orthographic_density(): drop_caches() assert SubstitutionFeaturesMixin._orthographic_density("time") == 13 assert np.isnan(SubstitutionFeaturesMixin._orthographic_density("wickiup"))