Пример #1
0
    def _generate_icd9_lookup(self):
        self._diag_to_desc = {}
        tree = ICD9('../lib/icd9/codes.json')

        for d in self._diags:
            try:
                self._diag_to_desc[d] = tree.find(d[2:]).description
            except:
                if d[2:] == "008":
                    self._diag_to_desc[
                        d] = "Intestinal infections due to other organisms"
                elif d[2:] == "280":
                    self._diag_to_desc[d] = "Iron deficiency anemias"
                elif d[2:] == "284":
                    self._diag_to_desc[
                        d] = "Aplastic anemia and other bone marrow failure syndrome"
                elif d[2:] == "285":
                    self._diag_to_desc[d] = "Other and unspecified anemias"
                elif d[2:] == "286":
                    self._diag_to_desc[d] = "Coagulation defects"
                elif d[2:] == "287":
                    self._diag_to_desc[
                        d] = "Purpura and other hemorrhagic conditions"
                elif d[2:] == "288":
                    self._diag_to_desc[d] = "Diseases of white blood cells"
                else:
                    self._diag_to_desc[d] = "Not Found"
Пример #2
0
def generate_icd9_lookup():
    """Generate description from ICD9 code"""
    tree = ICD9('../lib/icd9/codes.json')

    for ud in uniq_diag:
        try:
            diag_to_desc[ud] = tree.find(ud[2:]).description
        except:
            if ud[2:] == "008":
                diag_to_desc[
                    ud] = "Intestinal infections due to other organisms"
            elif ud[2:] == "280":
                diag_to_desc[ud] = "Iron deficiency anemias"
            elif ud[2:] == "284":
                diag_to_desc[
                    ud] = "Aplastic anemia and other bone marrow failure syndrome"
            elif ud[2:] == "285":
                diag_to_desc[ud] = "Other and unspecified anemias"
            elif ud[2:] == "286":
                diag_to_desc[ud] = "Coagulation defects"
            elif ud[2:] == "287":
                diag_to_desc[ud] = "Purpura and other hemorrhagic conditions"
            elif ud[2:] == "288":
                diag_to_desc[ud] = "Diseases of white blood cells"
            else:
                diag_to_desc[ud] = "Not Found"
Пример #3
0
def rollup_mimic(mimic_path, mimic_filename, codes_path):
    """
    Rolls up all the codes in mimic data from our preprocessing
    """
    mimic_data = pd.read_csv(os.path.join(mimic_path, mimic_filename))
    tree = ICD9(codes_path)
    mimic_data["DIAG_ICD_ALL"] = mimic_data.ICD_DIAG.apply(
        rollup_multiple_codes, icd_tree=tree)
    mimic_data = mimic_data.assign(**mimic_data.DIAG_ICD_ALL.apply(pd.Series))
    mimic_data.to_csv(os.path.join(mimic_path, "mimic_processed.csv"))
Пример #4
0
    def _generate_icd9_lookup(self):
        self._diag_to_desc = {}
        tree = ICD9('../../lib/icd9/codes.json')

        for d in self._diags:
            try:
                self._diag_to_desc[d] = tree.find(d[2:]).description
            except:
                if d[2:] == "285.9":
                    self._diag_to_desc[d] = "Anemia"
                elif d[2:] == "287.5":
                    self._diag_to_desc[d] = "Thrombocytopenia"
                elif d[2:] == "285.1":
                    self._diag_to_desc[d] = "Acute posthemorrhagic anemia"
                else:
                    self._diag_to_desc[d] = "Not Found"
Пример #5
0
import os
import ipdb
from nltk.tokenize import word_tokenize
from icd9 import ICD9
from transformers import AutoConfig, AutoModel, AutoTokenizer
import torch
from tqdm import tqdm
import numpy as np
import sys
sys.path.append("../../pretrain")
from load_umls import UMLS

tree = ICD9('codes.json')
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
log_list = 1 / np.log2(list(range(2, 1001, 1)))

batch_size = 512
max_seq_length = 32


def get_icd9_pairs(icd9_set):
    icd9_pairs = {}
    with open('icd9_grp_file.txt', 'r', encoding="utf-8") as infile:
        data = infile.readlines()
        for row in data:
            codes, name = row.strip().split('#')
            name = name.strip()
            codes = codes.strip().split(' ')
            new_codes = set([])
            for code in codes:
                if code in icd9_set:
Пример #6
0
out_directory = 'data'

# In[330]:

data_train.sort_index().to_pickle(out_directory + '/data_train.pkl')
data_test.sort_index().to_pickle(out_directory + '/data_test.pkl')

# In[331]:

target_train.sort_index().to_pickle(out_directory + '/target_train.pkl')
target_test.sort_index().to_pickle(out_directory + '/target_test.pkl')

# In[332]:

from icd9 import ICD9
tree = ICD9('codes_pretty_printed.json')

# In[333]:

labels_icd9 = []
ids = []

for k in range(390, 459, 1):
    ids.append(k - 390)
    if tree.find(str(k)) is not None:
        #print(tree.find(str(k)).description)
        labels_icd9.append(tree.find(str(k)).description)
    else:
        #print(tree.find('390-459').description+str(k))
        labels_icd9.append(tree.find('390-459').description + str(k))
np.random.seed(12345)

# In[4]:

reload(eu)
reload(meu)
reload(sdu)

# In[6]:

sys.path.append('icd9')
from icd9 import ICD9

# feel free to replace with your path to the json file
tree = ICD9('icd9/codes.json')

# ## Configure pandas and matplot lib for nice web printing

# In[7]:

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 100

# In[8]:

get_ipython().run_line_magic('matplotlib', 'inline')

# ## Load config files, configure logging
Пример #8
0
import csv
from icd9 import ICD9  # not python3 compatible

tree = ICD9('./codes.json')


def cluster_codes(dataset_path, level=2):
    """
    levels
    - 1 top (eg 'Endocrine, Nutritional And Metabolic Diseases, And Immunity Disorders')
    - 2 super (eg 'Diseases Of Other Endocrine Glands')
    - 3 basic (eg 'Diabetes mellitus')
    - 4 sub (eg 'Diabetes mellitus without mention of complication')
    - 5 specific (eg 'Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled')
    """
    with open(dataset_path) as data_f:
        data = csv.reader(data_f, delimiter=',')
        diagnosis_codes = {}
        results = []
        for i, line in enumerate(data):
            if i == 0:
                diag_index = line.index('diag_1')
                line.append('diag_1_desc')
                line.append('diag_2_desc')
                line.append('diag_3_desc')
                results.append(line)
                continue
            diag_1 = line[diag_index]
            diag_2 = line[diag_index + 1]
            diag_3 = line[diag_index + 2]
            for j, diag in enumerate([diag_1, diag_2, diag_3]):
Пример #9
0
def rollup_mimic(mimic_path, mimic_filename, codes_path):
    """
    Rolls up all the codes in mimic data from our preprocessing
    """
    mimic_data = pd.read_csv(os.path.join(mimic_path, mimic_filename))
    tree = ICD9(codes_path)
    mimic_data["DIAG_ICD_ALL"] = mimic_data.ICD_DIAG.apply(
        rollup_multiple_codes, icd_tree=tree)
    mimic_data = mimic_data.assign(**mimic_data.DIAG_ICD_ALL.apply(pd.Series))
    mimic_data.to_csv(os.path.join(mimic_path, "mimic_processed.csv"))


if __name__ == "__main__":
    import pdb

    pdb.set_trace()
    tree = ICD9("codes.json")
    # mimic_rollup_test = pd.read_csv("mimic_rollup_test.csv")
    # mimic_rollup_test["ICD_3"] = mimic_rollup_test.ICD9_CODE.apply(rollup, level = 3, icd_tree = tree)
    # mimic_rollup_test["ALL_ICD"] = mimic_rollup_test.ICD9_CODE.apply(rollup_all_levels, icd_tree = tree)
    # mimic_rollup_test.assign(**mimic_rollup_test.ALL_ICD.apply(pd.Series))
    # for i, row in mimic_rollup_test.iterrows():
    # row.ICD_rolled_up = rollup(code = str(row.ICD9_CODE), level = 1, icd_tree = tree)
    rollup_mimic(
        "/Users/michalmalyska/Desktop/Projects/semantic_health_public/deep-patient-cohorts/data",
        "mimic_real_test.csv",
        ("/Users/michalmalyska/Desktop/Projects/semantic_health_public/deep-patient-cohorts"
         "/scripts/icd_rollup/codes.json"),
    )
Пример #10
0
def create_descriptive_recordset(input_data, one_hot_map):
    # Create IC9 map of diagnoses codes & Record
    descriptors = {
        "admloc_": "admission_location",
        "disloc_": "discharge_location",
        "ins_": "insurance",
        "lang_": "language",
        "rel_": "religion",
        "mar_": "marital_status",
        "eth_": "ethnicity",
        "gen_": "gender",
        "dia_": "diagnoses"
    }
    enabled_descriptors = set()
    tree = ICD9()
    diagnostic_codes_map = {}
    for key in one_hot_map.keys():
        for descriptor in descriptors.keys():
            if key.startswith(descriptor):
                enabled_descriptors.add(descriptors[descriptor])
        if key.startswith("dia_"):
            try:
                condition = key[len("dia_D_"):]
                diagnostic_codes_map[key] = "{}-{}".format(
                    condition,
                    tree.find(condition).description)
            except:
                diagnostic_codes_map[key] = key[len("dia_D_"):]
    DescriptiveRecord = namedtuple("DescriptiveRecord",
                                   list(sorted(enabled_descriptors)))

    # Actually process stuff
    sparse_descriptive_records = []
    for i, record in enumerate(input_data):

        record_lists = {key: [] for key in enabled_descriptors}

        # Process record
        for column in record:
            for column_mask, column_description in descriptors.items():
                if column.startswith(column_mask):
                    if column_mask == "dia_":
                        condition = column[len("dia_D_"):]
                        condition_description = "{}-{}".format(
                            condition, diagnostic_codes_map[column])
                        record_lists[column_description].append(
                            condition_description)
                    else:
                        record_lists[column_description].append(
                            column[len(column_mask):])

        # Package record
        packaged_record = [
            tuple(record_lists[key]) for key in sorted(enabled_descriptors)
        ]

        sparse_descriptive_records.append(DescriptiveRecord(*packaged_record))

        # Report progress
        if (i + 1) % 1000 == 0:
            print("{} records processed so far.".format(i + 1))

    return sparse_descriptive_records, list(sorted(enabled_descriptors))