def extract_focus_patients(self, patients_data): with_dm_or_htn = [ p for p in patients_data if (p["Diabetes"] == "true" or p["Hipertensión"] == "true") ] with_dm_or_htn_dir = INTERMEDIATES_DIR / self.community with_dm_or_htn_file = with_dm_or_htn_dir / HAS_DM_OR_HTN_FILENAME ezcsv.write_dicts(with_dm_or_htn, with_dm_or_htn_file, mkdir=True, silent_fail=True) print("Out of {} total patients, found {} with DM or HTN.".format( len(patients_data), len(with_dm_or_htn))) print( "Writing list of these patients to {}".format(with_dm_or_htn_file)) return with_dm_or_htn
def extract_col_consults(self, focus_pts): focus_pts_cesids = set(p["CesID"] for p in focus_pts) consults_data = ezcsv.read_dicts(INPUT_DIR / self.community / CONSULTS_CSV) consults_for_focus_pts = [ c for c in consults_data if c["CESid"] in focus_pts_cesids ] print() print("Found {} consults for those patients.".format( len(consults_for_focus_pts))) consults_for_focus_pts_file = (INTERMEDIATES_DIR / self.community / CONSULTS_FOR_FOCUS_PTS_FILENAME) print("Writing list of these consults to {}".format( consults_for_focus_pts_file)) ezcsv.write_dicts( consults_for_focus_pts, consults_for_focus_pts_file, mkdir=True, silent_fail=True, ) with_hdl_data = [] for c in consults_for_focus_pts: hdl = c["HDL"] if c["HDL"] != "" else _extract_hdl_from_note(c) col = (c["Colesterol"] if c["Colesterol"] != "" else _extract_col_from_note(c)) if hdl: # check merely that it's not None c["HDL"] = hdl c["Colesterol"] = col with_hdl_data.append(c) with_hdl_data_file = INTERMEDIATES_DIR / self.community / WITH_HDL_DATA_FILENAME print() print("Found {} consults mentioning 'hdl'.".format(len(with_hdl_data))) print( "Of these, extracted HDL for {} of them, and Total Cholesterol for {} of them." .format( len([1 for l in with_hdl_data if l["HDL"] != ""]), len([1 for l in with_hdl_data if l["Colesterol"] != ""]), )) print("Writing these consults to {}".format(with_hdl_data_file)) ezcsv.write_dicts(with_hdl_data, with_hdl_data_file, mkdir=True, silent_fail=True) return with_hdl_data
def extract_output_fields(self, patients, consults): output_data = [] patient_by_cesid = {p["CesID"]: p for p in patients} for c in consults: patient = patient_by_cesid[c["CESid"]] output_line = {f: c[f] for f in CONSULT_OUTPUT_FIELDS} output_line.update({f: patient[f] for f in PATIENT_OUTPUT_FIELDS}) output_data.append(output_line) pruned_output_data = self.extract_last_records_per_patient(output_data) output_data_file = OUTPUT_DIR / self.community / OUTPUT_FILE print() print("Writing output data to {}".format(output_data_file)) ezcsv.write_dicts(pruned_output_data, output_data_file, mkdir=True, silent_fail=True) return pruned_output_data
def extract_missing_data_patients(self, patients, consults): cesids_with_col_data = set([c["CESid"] for c in consults]) no_col_data = [ p for p in patients if p["CesID"] not in cesids_with_col_data ] print() print( "Found {} patients with DM or HTN but no Cholesterol data".format( len(no_col_data))) (OUTPUT_DIR / self.community).mkdir(parents=True, exist_ok=True) stats_file = OUTPUT_DIR / self.community / OUTPUT_STATS with open(stats_file, "w", encoding="utf-8") as stats: stats.write( "Patients with DM or HTN but no cholesterol data: {}\n".format( len(no_col_data))) no_col_data_file = OUTPUT_DIR / self.community / OUTPUT_NO_COL_DATA_FILE print("Writing this list of patients to {}".format(no_col_data_file)) ezcsv.write_dicts(no_col_data, no_col_data_file, mkdir=True, silent_fail=True) return no_col_data
def do_refs(censo_parto, raw_refs_data, communities): print() print("Computing refs matches") with_parto_and_refs_matches = match(censo_parto, raw_refs_data, OUTPUT, REFS) refs_community_matches = get_by_community(with_parto_and_refs_matches, raw_refs_data, REFS, communities) censo_parto_refs = with_parto_and_refs_matches + refs_community_matches all_keys = sorted( list( set((list(censo_parto[0].keys()) + [PREFIXES[REFS] + m for m in raw_refs_data[0].keys()] + list(ADDITIONAL_COLUMNS.keys()))))) output = [] for l in censo_parto_refs: output.append(dict(ADDITIONAL_COLUMNS, **l)) ezcsv.write_dicts(output, OUTPUT_PATH, mkdir=True, fieldnames=all_keys)
def do_partos(raw_censo_data, raw_partos_data, communities): print("Computing parto matches") with_parto_matches = match(raw_censo_data, raw_partos_data, CENSO, PARTOS) parto_community_matches = get_by_community(with_parto_matches, raw_partos_data, PARTOS, communities) censo_parto = with_parto_matches + parto_community_matches all_keys = sorted( list( set(([PREFIXES[CENSO] + l for l in raw_censo_data[0].keys()] + [PREFIXES[PARTOS] + m for m in raw_partos_data[0].keys()] + list(ADDITIONAL_COLUMNS.keys()))))) ezcsv.write_dicts(censo_parto, CENSO_WITH_PARTO_LINES, mkdir=True, fieldnames=all_keys) return censo_parto
def main(community): COMMUNITY = community input_file = OUTPUT_DIR / COMMUNITY / ASCVD_INPUT_FILENAME input_data = ezcsv.read_dicts(input_file) output_data = [] for p in input_data: try: ascvd_10yr = ascvd.compute_ten_year_score( isMale=(p["Sexo"] == "1"), isBlack=False, smoker=False, hypertensive=(p["Hipertensión"] == "true"), diabetic=(p["Diabetes"] == "true"), age=(datetime.now().year - int(p["FN_Ano"])), systolicBloodPressure=int(p["PA Sistólica"]), totalCholesterol=int(p["Colesterol"]), hdl=int(p["HDL"]), ) p["ASCVD 10 year"] = ascvd_10yr except ValueError: # expected for patients who are missing some data pass output_data.append(p) output_data_file = OUTPUT_DIR / COMMUNITY / OUTPUT_FILENAME ezcsv.write_dicts(output_data, output_data_file)