"number of feature levels of years and nid should match number" assert len(df_agg['age_start'].unique()) == len(df_agg['age_end'].unique()),\ "number of feature levels age start should match number of feature " +\ r"levels age end" assert len(df_agg['diagnosis_id'].unique()) <= 2,\ "diagnosis_id should have 2 or less feature levels" assert len(df_agg['code_system_id'].unique()) <= 2,\ "code_system_id should have 2 or less feature levels" assert len(df_agg['source'].unique()) == 1,\ "source should only have one feature level" assert round(val_sum, 3) == round(df_agg.val.sum(), 3),\ "some cases were lost" fpath = r"FILEPATH" compare_df = pd.read_hdf(fpath) test_results = stage_hosp_prep.test_case_counts(df_agg, compare_df) if test_results == "test_case_counts has passed!!": pass else: msg = " --- ".join(test_results) assert False, msg write_path = root + r"FILENAME" +\ r"FILEPATH" write_hosp_file(df_agg, write_path, backup=True)
assert len(df_agg['sex_id'].unique()) == 2,\ "There should only be two feature levels to sex_id" assert len(df_agg['code_system_id'].unique()) <= 2,\ "code_system_id should have 2 or less feature levels" assert len(df_agg['source'].unique()) == 1,\ "source should only have one feature level" assert (df.val >= 0).all(), ("for some reason there are negative case counts") updated_locs = [4761, 4750, 4760] test_df = df_agg[~df_agg.location_id.isin(updated_locs)] compare_df = pd.read_hdf("FILEPATH") compare_df = compare_df[~compare_df.location_id.isin(updated_locs)] test_results = stage_hosp_prep.test_case_counts(test_df, compare_df) if test_results == "test_case_counts has passed!!": pass else: msg = " --- ".join(test_results) assert False, msg write_path = root + r"FILENAME"\ "FILEPATH" write_hosp_file(df_agg, write_path, backup=True)