import pandas as pd from catalog_preparation.catalog_creation_helper import remove_literally, all_cases_of_cases food_file_path = '../data/food/food.tsv' output_csv_path = '../data/food/food_usda_tidy.csv' if __name__ == '__main__': food_data = pd.read_table(food_file_path, sep=';', encoding='cp1252', header=1, names=['group', 'name']) food_data = remove_literally(food_data, ['pie']) food_data_cases = all_cases_of_cases(food_data) food_data = pd.merge(food_data, food_data_cases, on=list(food_data), how='outer') food_data = food_data.drop_duplicates() food_data.to_csv(output_csv_path, index=False)
synonyms = [] try: groups = [group.value for group in stanza.tags['is_a']] except: groups = ['NA'] names = [name] + synonyms # apostrophe1_names = [name.replace('\'', '’') for name in names if '\'' in name] # apostrophe2_names = [name.replace('’', '\'') for name in names if '’' in name] names = [re.sub('[’\']', '', name) for name in names] for disease_name in names: for group in groups: entry = {'id': disease_id, 'name': disease_name, 'group': group, 'obsolete': is_obsolete} disease_data = disease_data.append(entry, ignore_index=True) return disease_data obo_path = "../data/diseases/doid.obo" output_path = '../data/diseases/diseases_catalog.csv' if __name__ == '__main__': disease_data = parse_obo(obo_path) disease_data = remove_literally(disease_data, ['disease']) disease_data_cases = all_cases_of_cases(disease_data) disease_data = pd.merge(disease_data, disease_data_cases, on=list(disease_data), how='outer') disease_data = disease_data.drop_duplicates() disease_data.to_csv(output_path, index=False, sep='\t')