def dryer_data2(*feature_names): # data[area][genus][(feature_values)] = langauge_count data = {} # Languages that all features have languages = set() g = Genealogy() feature = Feature(feature_names[0]) for language in feature.languages(): languages.add(language.code) for feature_name in feature_names: feature = Feature(feature_name) this_set = set() for language in feature.languages(): this_set.add(language.code) languages &= this_set for language_code in languages: language = g.find_language_by_code(language_code) area = language.area genus = language.genus.name value = ','.join(v['description'] for v in sorted(language.features.values())) data.setdefault(area, {}) data[area].setdefault(genus, {}) data[area][genus].setdefault(value, 0) data[area][genus][value] += 1 return data
def __init__(self, name): g = Genealogy() data = WALS.get_feature(name) self.name = name self.description = WALS.FEATURE_MAP[name] self.feature_values = WALS.FEATURE_VALUES[name] self.data = {} for lang_code, value in data.items(): language = g.find_language_by_code(lang_code) language.features[name] = self.feature_values[value] self.data[lang_code] = language