Пример #1
0
def compare_labs_by(conn, col, val_pos, val_neg, lab_name, min_value=0, max_value=999999, query_tail=''):
  
  query = "SELECT lab_result_value from lab_data " + \
    " INNER JOIN patient_data ON " + \
    " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
    " lab_data.lab_name = '" + lab_name + "' AND " + \
    " lab_data.lab_result_status = 'resulted' AND " + \
    " patient_data." + col + " = "
  
  res = sql_fetch_all(conn, query + " '" + val_pos + "' " + query_tail)
  values_pos = [float(value[0]) for value in res]
  
  res = sql_fetch_all(conn, query + " '" +  val_neg + "' " + query_tail)
  values_neg = [float(value[0]) for value in res]
  
  plot_compare_kde(lab_name, col, val_pos, val_neg, values_pos, \
    values_neg, min_value, max_value)
Пример #2
0
def compare_obs_by(conn, col, val_pos, val_neg, obs_name, min_value=0, max_value=999999, query_tail=''):
  
  query = "SELECT observation_value from observation_data " + \
    " INNER JOIN patient_data ON " + \
    " observation_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
    " observation_data.observation_name = '"+obs_name+"' AND " + \
    " observation_data.observation_value NOT NULL AND " + \
    " patient_data." + col + " = "
  
  res = sql_fetch_all(conn, query + " '" + val_pos + "' " + query_tail)

  values_pos = [float(value[0]) for value in res]
  
  res = sql_fetch_all(conn, query + " '" + val_neg + "' " + query_tail)
  
  values_neg = [float(value[0]) for value in res]
  
  plot_compare_kde(obs_name, col, val_pos, val_neg, values_pos, \
    values_neg, min_value, max_value)
Пример #3
0
def compare_drugs_by_death(conn, drug_names):

    query = "SELECT patient_site_uid from patient_data WHERE " + \
            "patient_covid_status = 'positive'"
    all_patients = set([rec[0] for rec in sql_fetch_all(conn, query)])

    query = "SELECT patient_site_uid from patient_data WHERE " + \
            "patient_covid_status = 'positive' AND patient_vital_status = 'dead'"
    dead_patients = set([rec[0] for rec in sql_fetch_all(conn, query)])
    alive_patients = all_patients.difference(dead_patients)

    query = "SELECT drug_data.patient_site_uid from drug_data INNER JOIN " + \
            " patient_data ON patient_data.patient_site_uid = " + \
            " drug_data.patient_site_uid WHERE " + \
            " patient_data.patient_covid_status = 'positive' AND " + \
            " drug_name IN ('" + "', '".join(drug_names) + "')"

    exposed_patients = set([rec[0] for rec in sql_fetch_all(conn, query)])

    nonexposed_patients = all_patients.difference(exposed_patients)

    a = len(list(exposed_patients & dead_patients))
    b = len(list(exposed_patients & alive_patients))
    c = len(list(nonexposed_patients & dead_patients))
    d = len(list(nonexposed_patients & alive_patients))

    if len(exposed_patients) == 0: return [-1, None, None, None]

    odds_ratio = (float(0.5+a)/float(0.5+c)) / \
                 (float(0.5+b) / float(0.5+d))
    odds_ratio = round(odds_ratio, 2)

    log_or = log(odds_ratio)
    se_log_or = sqrt(1/float(0.5+a) + 1/float(0.5+b) + \
                     1/float(0.5+c) + 1/float(0.5+d))
    lower_ci = exp(log_or - 1.96 * se_log_or)
    upper_ci = exp(log_or + 1.96 * se_log_or)

    return [odds_ratio, lower_ci, upper_ci, len(exposed_patients)]
Пример #4
0
def compute_drug_odds_ratios(conn):

    query = "SELECT drug_name from drug_data"
    drugs = list(set([rec[0] for rec in sql_fetch_all(conn, query)]))

    odds_ratios = []
    for drug in drugs:
        if "'" in drug: continue
        odds = compare_drugs_by_death(conn, [drug])
        if odds[0] == -1: continue
        if odds[-1] < 5: continue
        odds_ratios.append([drug, odds])

    odds_ratios.sort(key=lambda x: x[1][0])

    for drug, odds in odds_ratios:
        print('%s - OR = %.2f (%.2f-%.2f), N exposed = %d' % \
             (drug, odds[0], odds[1], odds[2], odds[3]) )
Пример #5
0
def summarize_variable(table, variable):

    query = "SELECT %s FROM %s " % (variable, table)

    if table == 'patient_data':
        query += "WHERE patient_covid_status = 'positive'"
    else:
        query += (" INNER JOIN patient_data ON " + \
        " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE "
        " patient_data.patient_covid_status = 'positive'")

    values = np.asarray(
        [x[0] for x in sql_fetch_all(conn, query) if x[0] is not None])

    print("Summary for variable %s" % variable)
    print("Mean: %.2f" % np.mean(values))
    print("Std: %.2f" % np.std(values))

    pct_above70 = np.count_nonzero(values > 70) / len(values)
    print("Proportion above 70: %.2f" % pct_above70)
Пример #6
0
db_file_name = os.path.join(SQLITE_DIRECTORY, 'covidb_version-1.0.0.db')
conn = sqlite3.connect(db_file_name)

query_icu = "SELECT episode_data.patient_site_uid from episode_data INNER JOIN " + \
        " patient_data ON episode_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
        " (episode_data.episode_unit_type = 'intensive_care_unit' OR " + \
        "  episode_data.episode_unit_type = 'high_dependency_unit')  " + \
        inclusion_flag

query_deaths = "SELECT diagnosis_data.patient_site_uid from diagnosis_data INNER JOIN " + \
        " patient_data ON diagnosis_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
        " diagnosis_data.diagnosis_type = 'death' " + \
        inclusion_flag

icu_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_icu)])
death_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_deaths)])

query = "SELECT episode_data.patient_site_uid, episode_start_time from episode_data INNER JOIN " + \
         " patient_data ON episode_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
         " (episode_data.episode_unit_type = 'inpatient_ward' OR " + \
         " episode_data.episode_unit_type = 'emergency_room' OR " + \
         " episode_data.episode_unit_type = 'intensive_care_unit') " + \
         inclusion_flag

res = sql_fetch_all(conn, query)

eligible_patients = set()
eligible_episodes = {}

for patient_id, episode_start_time in res:
Пример #7
0
db_file_name = os.path.join(SQLITE_DIRECTORY, 'covidb_version-1.0.0.db')
conn = sqlite3.connect(db_file_name)

def map_age(age):
  if age is None: return 60
  else: return float(age)

def map_sex(sex):
  if sex == 'male': return 1
  else: return 0

query_info = "SELECT patient_site_uid, patient_age, patient_sex from patient_data WHERE " + \
        " patient_data.patient_covid_status = 'positive'"

pt_infos = dict((str(x[0]), [map_age(x[1]), map_sex(x[2])]) for x in sql_fetch_all(conn, query_info))

query_icu = "SELECT episode_data.patient_site_uid from episode_data INNER JOIN " + \
        " patient_data ON episode_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
        " (episode_data.episode_unit_type = 'intensive_care_unit' OR " + \
        "  episode_data.episode_unit_type = 'high_dependency_unit') AND " + \
        " patient_data.patient_covid_status = 'positive'"

query_deaths = "SELECT diagnosis_data.patient_site_uid from diagnosis_data INNER JOIN " + \
        " patient_data ON diagnosis_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
        " diagnosis_data.diagnosis_type = 'death' AND " + \
        " patient_data.patient_covid_status = 'positive'"

icu_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_icu)])
death_pt_ids = set([str(x[0]) for x in sql_fetch_all(conn, query_deaths)])
Пример #8
0
  
  res = sql_fetch_all(conn, query + " '" + val_neg + "' " + query_tail)
  
  values_neg = [float(value[0]) for value in res]
  
  plot_compare_kde(obs_name, col, val_pos, val_neg, values_pos, \
    values_neg, min_value, max_value)

fig, axs = plt.subplots(5, 5)
fig.set_figheight(15)
fig.set_figwidth(15)

db_file_name = os.path.join(SQLITE_DIRECTORY, 'covidb_version-1.0.0.db')
conn = sqlite3.connect(db_file_name)

res = sql_fetch_all(conn, "SELECT * FROM drug_data WHERE drug_name LIKE '%parin%' AND drug_roa='sc'")

plt.sca(axs[0, 0])
compare_labs_by_covid(conn, 'lymphocyte_count', min_value=0.1, max_value=10)

plt.sca(axs[0, 1])
compare_labs_by_covid(conn, 'c_reactive_protein', min_value=50, max_value=350)

plt.sca(axs[0, 2])
compare_labs_by_covid(conn, 'd_dimer', min_value=100, max_value=6000)

plt.sca(axs[0, 3])
compare_labs_by_covid(conn, 'mean_platelet_volume', min_value=8, max_value=12)

plt.sca(axs[0, 4])
compare_labs_by_covid(conn, 'phosphate', min_value=0.5, max_value=2.5)
Пример #9
0
def correlate_labs(conn):

    query_art_pco2 = "SELECT patient_data.patient_site_uid, " + \
      " lab_sample_time, lab_result_value from lab_data " + \
      " INNER JOIN patient_data ON " + \
      " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
      " lab_data.lab_name = 'pco2' AND " + \
      " lab_data.lab_sample_site = 'arterial_blood' AND " + \
      " lab_data.lab_result_status = 'resulted' AND " + \
      " patient_data.patient_covid_status = 'positive'"

    query_ven_pco2 = "SELECT patient_data.patient_site_uid, " + \
      " lab_sample_time, lab_result_value from lab_data " + \
      " INNER JOIN patient_data ON " + \
      " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
      " lab_data.lab_name = 'pco2' AND " + \
      " lab_data.lab_sample_site = 'venous_blood' AND " + \
      " lab_data.lab_result_status = 'resulted' AND " + \
      " patient_data.patient_covid_status = 'positive'"

    query_d_dimer = "SELECT patient_data.patient_site_uid, " + \
      " lab_sample_time, lab_result_value from lab_data " + \
      " INNER JOIN patient_data ON " + \
      " lab_data.patient_site_uid = patient_data.patient_site_uid WHERE " + \
      " lab_data.lab_name = 'd_dimer' AND " + \
      " lab_data.lab_result_status = 'resulted' AND " + \
      " patient_data.patient_covid_status = 'positive'"

    arterial_pco2s = sql_fetch_all(conn, query_art_pco2)
    venous_pco2s = sql_fetch_all(conn, query_ven_pco2)
    d_dimers = sql_fetch_all(conn, query_d_dimer)

    pairs = []

    for arterial_pco2 in arterial_pco2s:
        id_1, st_1, rv_1 = arterial_pco2
        for venous_pco2 in venous_pco2s:
            id_2, st_2, rv_2 = venous_pco2
            if id_1 != id_2: continue
            av_pco2 = rv_2 - rv_1
            if av_pco2 < 0: continue
            dt12 = get_hours_between_datetimes(st_1, st_2)
            if dt12 < 2:
                pairs.append([arterial_pco2, venous_pco2])

    x = []
    y = []
    ids = []

    for d_dimer in d_dimers:
        id_1, st_1, rv_1 = d_dimer
        for arterial_pco2, venous_pco2 in pairs:
            id_2, st_2, rv_2 = arterial_pco2
            id_3, st_3, rv_3 = venous_pco2
            if not (id_1 == id_2 == id_3): continue
            td12 = get_hours_between_datetimes(st_1, st_2)
            td13 = get_hours_between_datetimes(st_1, st_3)
            if np.abs(td12) < 24 or np.abs(td13) < 24:
                x.append(rv_1)
                y.append(av_pco2)
                ids.append(id_1)

    print(len(x))
    print(len(np.unique(ids)))
    x = np.asarray(x)
    y = np.asarray(y)

    z = np.polyfit(x, y, 1)
    p = np.poly1d(z)

    plt.scatter(x, y)
    plt.plot(x, p(x), 'r--')
    plt.show()