def attach_nid(df): """ add merged (private + medicare supplemental insurance) NIDs onto the marketscan data """ nid_dict = {2000: 244369, 2010: 244370, 2011: 336850, 2012: 244371, 2013: 336849, 2014: 336848, 2015: 336847, 2016: 408680} df = hosp_prep.fill_nid(df, nid_dict) assert (df['nid'] == 0).sum() == 0, "There are missing NIDs" return df
2001: 87001, 2002: 87002, 2003: 87003, 2004: 87004, 2005: 87005, 2006: 87006, 2007: 87007, 2008: 87008, 2009: 87009, 2010: 87010, 2011: 87011, 2012: 114876, 2013: 160484, 2014: 237756 } df = hosp_prep.fill_nid(df, nid_dictionary) fix_sex_dict = {'Mujeres': 2, 'Mujer': 2, 'Hombres': 1, 'Hombre': 1} df['sex_id'].replace(fix_sex_dict, inplace=True) df['sex_id'] = pd.to_numeric(df['sex_id'], downcast='integer', errors='raise') df.loc[(df['sex_id'] != 1) & (df['sex_id'] != 2), 'sex_id'] = 3 assert set(df.sex_id.unique()).issubset({1, 2, 3}) age_unit_translation_dict = { "A\xf1os (1 a 98 a\xf1os de edad)": "Years", "Meses (1 a 11 meses de edad)": "Months", 'D\xedas (1 a 29 d\xedas de edad)': 'Days', 'Ignorado': 'Unknown', 'Anios (1 a 115 anios de edad)': "Years",
nid_dict = { 2004: 86953, 2005: 86954, 2006: 86955, 2007: 86956, 2008: 86957, 2009: 86958, 2010: 94170, 2011: 94171, 2012: 121282, 2013: 150449, 2014: 220205, 2015: 281773 } df = hosp_prep.fill_nid(df, nid_dict) df.los.value_counts(dropna=False).head() df.los.isnull().sum() df = df[df['los'] > 0] final_admits = len(df) df['facility_id'] = 'hospital' df = hosp_prep.age_binning(df, drop_age=True) int_cols = [ 'location_id', 'year_start', 'year_end', 'age_group_unit', 'age_start', 'age_end', 'sex_id', 'nid', 'representative_id', 'metric_id' ]