def map_episode_unit_type(unit_code, start_time=None): unit_code_str = str(unit_code).strip() if start_time is not None: time_delta = get_hours_between_datetimes( '2020-04-08 00:00:00', start_time) if '8NC' in unit_code_str: if time_delta > 0: return 'intensive_care_unit' else: return 'coronary_care_unit' if 'ER' in unit_code_str: return 'emergency_room' if unit_code_str not in UNIT_TYPE_MAP: print('Unrecognized unit type: ' + unit_code_str) return '' else: return UNIT_TYPE_MAP[unit_code_str]
def is_between_datetimes(dt, d1, d2): delta_low = get_hours_between_datetimes(d1, dt) delta_high = get_hours_between_datetimes(dt, d2, default_now=True) return delta_low > 0 and delta_high > 0
if lab_name == 'eosinophil_count': continue if lab_name in ['red_blood_cell_count', 'mean_corpuscular_volume', 'mean_corpuscular_hemoglobin', 'mean_corpuscular_hemoglobin_concentration']: continue if patient_id not in lab_bins: lab_bins[patient_id] = {} if lab_name not in lab_names: lab_names.append(lab_name) if lab_name not in patients_with_data: patients_with_data[lab_name] = set() if lab_name not in lab_bins[patient_id]: lab_bins[patient_id][lab_name] = [None for x in range(0,n_time_points)] episode_start_time = eligible_episodes[patient_id][0] hours_since_admission = get_hours_between_datetimes(episode_start_time, lab_sample_time) if hours_since_admission > left_limit and hours_since_admission < right_limit: bin_num = int(hours_since_admission / hours_per_period) + int(left_offset / hours_per_period) if bin_num >= n_time_points: continue if lab_name in lab_bins[patient_id] and \ bin_num < len(lab_bins[patient_id][lab_name])-1 and \ lab_bins[patient_id][lab_name][bin_num] is not None: # Pick the most abnormal (simplified here to highest) if lab_value > lab_bins[patient_id][lab_name][bin_num]: lab_bins[patient_id][lab_name][bin_num] = lab_value else: lab_bins[patient_id][lab_name][bin_num] = lab_value patients_with_data[lab_name].add(patient_id) total_lab_num += 1 total_entries_num += 1 patient_ids.append(patient_id)
lab_result_units = row.resultunit if lab_name.lower() in LAB_SKIP_VALUES or \ lab_result_string.lower() in LAB_SKIP_VALUES: continue if lab_result_string in LAB_CANCELLED_FLAGS: lab_result_status = 'cancelled' lab_result_string = '' elif 'attente' in lab_result_string: lab_result_status = 'pending' lab_result_string = '' else: lab_result_status = 'resulted' delta_hours = get_hours_between_datetimes( pcr_sample_times[str(patient_mrn)], str(lab_sample_time)) if delta_hours > -48 and delta_hours < 7 * 24: mapped_lab_name = map_lab_name(lab_name) mapped_lab_sample_site = map_lab_sample_site(lab_name, lab_sample_site) try: mapped_lab_value = map_lab_result_value(lab_result_string) except: print('Invalid lab value: ' + str(lab_result_string)) continue if mapped_lab_name == 'venous_o2_sat': print(mapped_lab_value)
df = sql_query("SELECT * FROM dw_v01.dw_rad_examen "+ "WHERE dossier IN ('S" + "', 'S".join(patient_mrns) + "') " + "AND date_heure_exam > '2020-01-01'") imaging_data_rows = [] patients_with_imaging = [] imaging_accession_numbers = [] for index, row in df.iterrows():[] lower_desc = row.description.lower() row_patient_mrn = str(row.dossier)[1:] if ('rx' in lower_desc and 'poumon' in lower_desc) or \ ('scan' in lower_desc and 'thorax' in lower_desc): #('scan' in lower_desc and 'abdo' in lower_desc): hours = get_hours_between_datetimes( pcr_sample_times[row_patient_mrn], row.date_heure_exam) if hours < -48: continue patients_with_imaging.append(row_patient_mrn) imaging_accession_numbers.append(row.accession_number) imaging_accession_uid = generate_accession_uid(row.accession_number) imaging_acquired_time = row.date_heure_debut_examen if 'rx' in lower_desc: modality = 'xr' elif 'scan' in lower_desc: modality = 'ct' imaging_data_rows.append([ row_patient_mrn, imaging_accession_uid,
# Find the last episode for each patient to attribute death for row in episode_data_rows: patient_mrn = str(row[0]) episode_id = str(row[1]) episode_start_time = str(row[3]) episode = { 'episode_id': episode_id, 'episode_start_time': episode_start_time } delta = 1 if patient_mrn in last_episode_by_dossier: delta = get_hours_between_datetimes( last_episode_by_dossier[patient_mrn]['episode_start_time'], episode_start_time ) if delta > 0: last_episode_by_dossier[patient_mrn] = episode episode_ids.append(episode_id) episodes_by_id[episode_id] = episode df = sql_query("SELECT DISTINCT * FROM dw_test.orcl_cichum_bendeces_live WHERE " + \ "dossier in ('" + "', '".join(patient_mrns) + "') " + \ "AND dhredeces > '2020-01-01'") for index, row in df.iterrows(): patient_mrn = str(row.dossier)
for row in patient_data_rows: patient_mrn = str(row[0]) patient_mrns.append(patient_mrn) pcr_sample_times[patient_mrn] = row[2] df = sql_query("SELECT * from dw_test.orcl_hev_bipap_live WHERE " + \ "start_dtm > '2020-01-01' AND dossier in (" + ", ".join(patient_mrns) + ")") intervention_data_rows = [] for index, row in df.iterrows(): patient_mrn = str(int(row.dossier)) intervention_start_time = str(row.start_dtm) intervention_end_time = str(row.end_dtm) delta_hours = get_hours_between_datetimes(pcr_sample_times[patient_mrn], intervention_start_time) if delta_hours < -48: continue intervention_data_rows.append([ patient_mrn, 'mechanical_ventilation', intervention_start_time, intervention_end_time ]) print('Total rows: %d' % len(intervention_data_rows)) write_csv(TABLE_COLUMNS['intervention_data'], intervention_data_rows, os.path.join(CSV_DIRECTORY, 'intervention_data.csv'))
df = sql_query( "SELECT dossier, noadm, dhreadm, dhredep, diagdesc FROM " + "dw_test.orcl_cichum_sejurg_live WHERE " + \ "dossier in ('" + "', '".join(patient_mrns) + "') " + \ "AND dhreadm > '2020-01-01'" ) episode_data_rows = [ [ map_string_lower(row.dossier), map_string_lower(int(row.noadm)), map_episode_unit_type('ER', None), map_time(row.dhreadm), map_time(row.dhredep), map_string_lower(row.diagdesc), get_hours_between_datetimes( \ row.dhreadm, row.dhredep, default_now=True), ] for i, row in df.iterrows() \ if row.dhreadm != '' and \ pcr_sample_times[str(row.dossier)][0] != '' and \ get_hours_between_datetimes( \ row.dhreadm, pcr_sample_times[str(row.dossier)][0] ) > 0 ] episode_ids = [ [map_string_lower(row.dossier)] for i, row in df.iterrows() ] # Get inpatient location data from ADT #
def correlate_labs(conn): query_art_pco2 = "SELECT patient_data.patient_site_uid, " + \ " lab_sample_time, lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = 'pco2' AND " + \ " lab_data.lab_sample_site = 'arterial_blood' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data.patient_covid_status = 'positive'" query_ven_pco2 = "SELECT patient_data.patient_site_uid, " + \ " lab_sample_time, lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = 'pco2' AND " + \ " lab_data.lab_sample_site = 'venous_blood' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data.patient_covid_status = 'positive'" query_d_dimer = "SELECT patient_data.patient_site_uid, " + \ " lab_sample_time, lab_result_value from lab_data " + \ " INNER JOIN patient_data ON " + \ " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \ " lab_data.lab_name = 'd_dimer' AND " + \ " lab_data.lab_result_status = 'resulted' AND " + \ " patient_data.patient_covid_status = 'positive'" arterial_pco2s = sql_fetch_all(conn, query_art_pco2) venous_pco2s = sql_fetch_all(conn, query_ven_pco2) d_dimers = sql_fetch_all(conn, query_d_dimer) pairs = [] for arterial_pco2 in arterial_pco2s: id_1, st_1, rv_1 = arterial_pco2 for venous_pco2 in venous_pco2s: id_2, st_2, rv_2 = venous_pco2 if id_1 != id_2: continue av_pco2 = rv_2 - rv_1 if av_pco2 < 0: continue dt12 = get_hours_between_datetimes(st_1, st_2) if dt12 < 2: pairs.append([arterial_pco2, venous_pco2]) x = [] y = [] ids = [] for d_dimer in d_dimers: id_1, st_1, rv_1 = d_dimer for arterial_pco2, venous_pco2 in pairs: id_2, st_2, rv_2 = arterial_pco2 id_3, st_3, rv_3 = venous_pco2 if not (id_1 == id_2 == id_3): continue td12 = get_hours_between_datetimes(st_1, st_2) td13 = get_hours_between_datetimes(st_1, st_3) if np.abs(td12) < 24 or np.abs(td13) < 24: x.append(rv_1) y.append(av_pco2) ids.append(id_1) print(len(x)) print(len(np.unique(ids))) x = np.asarray(x) y = np.asarray(y) z = np.polyfit(x, y, 1) p = np.poly1d(z) plt.scatter(x, y) plt.plot(x, p(x), 'r--') plt.show()
patient_mrn = str(row.dossier) observation_name = row.servacro observation_value = row.rsltvalue observation_time = row.startdtm observation_unit = row.unitcd try: mapped_observation_name = map_observation_name(observation_name, observation_unit) except: continue mapped_observation_time = map_time(observation_time) mapped_observation_value = map_float_value(observation_value) delta_hours = get_hours_between_datetimes( pcr_sample_times[str(patient_mrn)], str(observation_time)) 9 if delta_hours < -48: continue observation_data_rows.append([ patient_mrn, mapped_observation_name, mapped_observation_time, mapped_observation_value ]) print('Done fetching observations from Oacis') df = sql_query( "SELECT * FROM dw_v01.oacis_lb WHERE " + "resultdtm IS NOT NULL AND longdesc in ('FIO2', 'Sat O2 Art', 'Température') AND " + "specimencollectiondtm > '2020-01-01' AND dossier in (" + ", ".join(patient_mrns) + ")")