def _generate_icd9_lookup(self): self._diag_to_desc = {} tree = ICD9('../lib/icd9/codes.json') for d in self._diags: try: self._diag_to_desc[d] = tree.find(d[2:]).description except: if d[2:] == "008": self._diag_to_desc[ d] = "Intestinal infections due to other organisms" elif d[2:] == "280": self._diag_to_desc[d] = "Iron deficiency anemias" elif d[2:] == "284": self._diag_to_desc[ d] = "Aplastic anemia and other bone marrow failure syndrome" elif d[2:] == "285": self._diag_to_desc[d] = "Other and unspecified anemias" elif d[2:] == "286": self._diag_to_desc[d] = "Coagulation defects" elif d[2:] == "287": self._diag_to_desc[ d] = "Purpura and other hemorrhagic conditions" elif d[2:] == "288": self._diag_to_desc[d] = "Diseases of white blood cells" else: self._diag_to_desc[d] = "Not Found"
def generate_icd9_lookup(): """Generate description from ICD9 code""" tree = ICD9('../lib/icd9/codes.json') for ud in uniq_diag: try: diag_to_desc[ud] = tree.find(ud[2:]).description except: if ud[2:] == "008": diag_to_desc[ ud] = "Intestinal infections due to other organisms" elif ud[2:] == "280": diag_to_desc[ud] = "Iron deficiency anemias" elif ud[2:] == "284": diag_to_desc[ ud] = "Aplastic anemia and other bone marrow failure syndrome" elif ud[2:] == "285": diag_to_desc[ud] = "Other and unspecified anemias" elif ud[2:] == "286": diag_to_desc[ud] = "Coagulation defects" elif ud[2:] == "287": diag_to_desc[ud] = "Purpura and other hemorrhagic conditions" elif ud[2:] == "288": diag_to_desc[ud] = "Diseases of white blood cells" else: diag_to_desc[ud] = "Not Found"
def rollup_mimic(mimic_path, mimic_filename, codes_path): """ Rolls up all the codes in mimic data from our preprocessing """ mimic_data = pd.read_csv(os.path.join(mimic_path, mimic_filename)) tree = ICD9(codes_path) mimic_data["DIAG_ICD_ALL"] = mimic_data.ICD_DIAG.apply( rollup_multiple_codes, icd_tree=tree) mimic_data = mimic_data.assign(**mimic_data.DIAG_ICD_ALL.apply(pd.Series)) mimic_data.to_csv(os.path.join(mimic_path, "mimic_processed.csv"))
def _generate_icd9_lookup(self): self._diag_to_desc = {} tree = ICD9('../../lib/icd9/codes.json') for d in self._diags: try: self._diag_to_desc[d] = tree.find(d[2:]).description except: if d[2:] == "285.9": self._diag_to_desc[d] = "Anemia" elif d[2:] == "287.5": self._diag_to_desc[d] = "Thrombocytopenia" elif d[2:] == "285.1": self._diag_to_desc[d] = "Acute posthemorrhagic anemia" else: self._diag_to_desc[d] = "Not Found"
import os import ipdb from nltk.tokenize import word_tokenize from icd9 import ICD9 from transformers import AutoConfig, AutoModel, AutoTokenizer import torch from tqdm import tqdm import numpy as np import sys sys.path.append("../../pretrain") from load_umls import UMLS tree = ICD9('codes.json') device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") log_list = 1 / np.log2(list(range(2, 1001, 1))) batch_size = 512 max_seq_length = 32 def get_icd9_pairs(icd9_set): icd9_pairs = {} with open('icd9_grp_file.txt', 'r', encoding="utf-8") as infile: data = infile.readlines() for row in data: codes, name = row.strip().split('#') name = name.strip() codes = codes.strip().split(' ') new_codes = set([]) for code in codes: if code in icd9_set:
out_directory = 'data' # In[330]: data_train.sort_index().to_pickle(out_directory + '/data_train.pkl') data_test.sort_index().to_pickle(out_directory + '/data_test.pkl') # In[331]: target_train.sort_index().to_pickle(out_directory + '/target_train.pkl') target_test.sort_index().to_pickle(out_directory + '/target_test.pkl') # In[332]: from icd9 import ICD9 tree = ICD9('codes_pretty_printed.json') # In[333]: labels_icd9 = [] ids = [] for k in range(390, 459, 1): ids.append(k - 390) if tree.find(str(k)) is not None: #print(tree.find(str(k)).description) labels_icd9.append(tree.find(str(k)).description) else: #print(tree.find('390-459').description+str(k)) labels_icd9.append(tree.find('390-459').description + str(k))
np.random.seed(12345) # In[4]: reload(eu) reload(meu) reload(sdu) # In[6]: sys.path.append('icd9') from icd9 import ICD9 # feel free to replace with your path to the json file tree = ICD9('icd9/codes.json') # ## Configure pandas and matplot lib for nice web printing # In[7]: pd.options.display.max_rows = 1000 pd.options.display.max_columns = 50 pd.options.display.max_colwidth = 100 # In[8]: get_ipython().run_line_magic('matplotlib', 'inline') # ## Load config files, configure logging
import csv from icd9 import ICD9 # not python3 compatible tree = ICD9('./codes.json') def cluster_codes(dataset_path, level=2): """ levels - 1 top (eg 'Endocrine, Nutritional And Metabolic Diseases, And Immunity Disorders') - 2 super (eg 'Diseases Of Other Endocrine Glands') - 3 basic (eg 'Diabetes mellitus') - 4 sub (eg 'Diabetes mellitus without mention of complication') - 5 specific (eg 'Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled') """ with open(dataset_path) as data_f: data = csv.reader(data_f, delimiter=',') diagnosis_codes = {} results = [] for i, line in enumerate(data): if i == 0: diag_index = line.index('diag_1') line.append('diag_1_desc') line.append('diag_2_desc') line.append('diag_3_desc') results.append(line) continue diag_1 = line[diag_index] diag_2 = line[diag_index + 1] diag_3 = line[diag_index + 2] for j, diag in enumerate([diag_1, diag_2, diag_3]):
def rollup_mimic(mimic_path, mimic_filename, codes_path): """ Rolls up all the codes in mimic data from our preprocessing """ mimic_data = pd.read_csv(os.path.join(mimic_path, mimic_filename)) tree = ICD9(codes_path) mimic_data["DIAG_ICD_ALL"] = mimic_data.ICD_DIAG.apply( rollup_multiple_codes, icd_tree=tree) mimic_data = mimic_data.assign(**mimic_data.DIAG_ICD_ALL.apply(pd.Series)) mimic_data.to_csv(os.path.join(mimic_path, "mimic_processed.csv")) if __name__ == "__main__": import pdb pdb.set_trace() tree = ICD9("codes.json") # mimic_rollup_test = pd.read_csv("mimic_rollup_test.csv") # mimic_rollup_test["ICD_3"] = mimic_rollup_test.ICD9_CODE.apply(rollup, level = 3, icd_tree = tree) # mimic_rollup_test["ALL_ICD"] = mimic_rollup_test.ICD9_CODE.apply(rollup_all_levels, icd_tree = tree) # mimic_rollup_test.assign(**mimic_rollup_test.ALL_ICD.apply(pd.Series)) # for i, row in mimic_rollup_test.iterrows(): # row.ICD_rolled_up = rollup(code = str(row.ICD9_CODE), level = 1, icd_tree = tree) rollup_mimic( "/Users/michalmalyska/Desktop/Projects/semantic_health_public/deep-patient-cohorts/data", "mimic_real_test.csv", ("/Users/michalmalyska/Desktop/Projects/semantic_health_public/deep-patient-cohorts" "/scripts/icd_rollup/codes.json"), )
def create_descriptive_recordset(input_data, one_hot_map): # Create IC9 map of diagnoses codes & Record descriptors = { "admloc_": "admission_location", "disloc_": "discharge_location", "ins_": "insurance", "lang_": "language", "rel_": "religion", "mar_": "marital_status", "eth_": "ethnicity", "gen_": "gender", "dia_": "diagnoses" } enabled_descriptors = set() tree = ICD9() diagnostic_codes_map = {} for key in one_hot_map.keys(): for descriptor in descriptors.keys(): if key.startswith(descriptor): enabled_descriptors.add(descriptors[descriptor]) if key.startswith("dia_"): try: condition = key[len("dia_D_"):] diagnostic_codes_map[key] = "{}-{}".format( condition, tree.find(condition).description) except: diagnostic_codes_map[key] = key[len("dia_D_"):] DescriptiveRecord = namedtuple("DescriptiveRecord", list(sorted(enabled_descriptors))) # Actually process stuff sparse_descriptive_records = [] for i, record in enumerate(input_data): record_lists = {key: [] for key in enabled_descriptors} # Process record for column in record: for column_mask, column_description in descriptors.items(): if column.startswith(column_mask): if column_mask == "dia_": condition = column[len("dia_D_"):] condition_description = "{}-{}".format( condition, diagnostic_codes_map[column]) record_lists[column_description].append( condition_description) else: record_lists[column_description].append( column[len(column_mask):]) # Package record packaged_record = [ tuple(record_lists[key]) for key in sorted(enabled_descriptors) ] sparse_descriptive_records.append(DescriptiveRecord(*packaged_record)) # Report progress if (i + 1) % 1000 == 0: print("{} records processed so far.".format(i + 1)) return sparse_descriptive_records, list(sorted(enabled_descriptors))