Пример #1
0
def get_recent_lab_values(empi, date):
    p = loader.get_patient_by_EMPI(empi)
    lab_latest = {}
    if 'Lab' in p.keys():
        for lab in p['Lab']:
            if lab['Seq_Date_Time'] and extract_data.parse_date(lab['Seq_Date_Time']) < date: 
                lab_date = extract_data.parse_date(lab['Seq_Date_Time'])
                if lab['Group_Id'] in lab_latest:
                    recorded_test_date = lab_latest[lab['Group_Id']][0]
                    if lab_date > recorded_test_date: # keep most recent test value
                        lab_latest[lab['Group_Id']] = (lab_date, lab['Result'])
                else:
                    lab_latest[lab['Group_Id']] = (lab_date, lab['Result'])
    return lab_latest
Пример #2
0
def main():
    empi = "FAKE_EMPI_385" # testing a single patient
    symptoms_regexes = getSymptomsRegexes()
    person = loader.get_patient_by_EMPI(empi)
    operation_date = build_graphs.get_operation_date(person)
    note_types = ['Car', 'Lno']
    person_pos_history = {}
    person_neg_history = {}
    sec_per_day = 24 * 60 * 60
    for note_type in note_types:
        print 'Examining ' + note_type + ' Notes for Patient ' + empi
        date_key = extract_data.get_date_key(note_type)
        if note_type in person.keys() and date_key != None:
            for i in range(len(person[note_type])):
                print '\tNote' + str(i)
                doc = person[note_type][i]
                date = extract_data.parse_date(doc[date_key])
                if date != None:
                    delta_days = (date - operation_date).total_seconds() / sec_per_day
                    for sym in symptoms_regexes:
                        normal, neg_pre, neg_suff = [bool(x.search(doc['free_text'])) for x in symptoms_regexes[sym]]
                        if neg_pre or neg_suff:
                            if sym in person_neg_history:
                                person_neg_history[sym].append(delta_days)
                            else:
                                person_neg_history[sym] = [delta_days]
                            print '\t\tNegative,' + sym + ',' + str(delta_days)
                        elif normal:
                            if sym in person_pos_history:
                                person_pos_history[sym].append(delta_days)
                            else:
                                person_pos_history[sym] = [delta_days]
                            print '\t\tPositive,' + sym + ',' + str(delta_days)
    return person_pos_history, person_neg_history
Пример #3
0
    def get_sent_vector(self, empi):
        patient = loader.get_patient_by_EMPI(empi)
        operation_date = extract_data.get_operation_date(patient)
        diagnoses = get_diagnoses(empi)

        date_key = extract_data.get_date_key(self.note_type)
        notes = []
        if self.note_type in patient.keys() and date_key != None:
            # Get sorted list of notes before procedure
            time_idx_pairs = []
            for i in range(len(patient[self.note_type])):
                doc = patient[self.note_type][i]
                date = extract_data.parse_date(doc[date_key])
                if date != None and date < operation_date:
                    time_idx_pairs.append((operation_date - date, i))
            time_idx_pairs.sort()

            for time, idx in time_idx_pairs[:self.max_notes]:
                doc = patient[self.note_type][idx]
                notes.append(doc['free_text'])

        # ensure that notes vector length is equal to max_notes
        if len(notes) < self.max_notes:
            delta = self.max_notes - len(notes)
            for i in range(delta):
                notes.append('')

        # Turn notes into Doc Vectors
        vectors = map(self.get_sent_vector_from_doc, notes)
        return np.array(vectors).flatten()
Пример #4
0
    def get_sent_vector(self, empi):
        patient = loader.get_patient_by_EMPI(empi)
        operation_date = extract_data.get_operation_date(patient)
        diagnoses = get_diagnoses(empi)

        date_key = extract_data.get_date_key(self.note_type)
        notes = []
        if self.note_type in patient.keys() and date_key != None:
            # Get sorted list of notes before procedure
            time_idx_pairs = []
            for i in range(len(patient[self.note_type])):
                doc = patient[self.note_type][i]
                date = extract_data.parse_date(doc[date_key])
                if date != None and date < operation_date:
                    time_idx_pairs.append((operation_date - date, i))
            time_idx_pairs.sort()

            for time,idx in time_idx_pairs[:self.max_notes]:
                doc = patient[self.note_type][idx]
                notes.append(doc['free_text'])

        # ensure that notes vector length is equal to max_notes
        if len(notes) < self.max_notes:
            delta = self.max_notes - len(notes)
            for i in range(delta):
                notes.append('')  

        # Turn notes into Doc Vectors
        vectors = map(self.get_sent_vector_from_doc, notes)
        return np.array(vectors).flatten()
Пример #5
0
def get_diagnoses(empi):
    """Given an empi, will the return the diagnosis timeline T for that patient.
    T is just an array of tuples of the form (diagnosis date, Code_Type, code, diagnosis name),
    sorted by date. Note that a given date may, and often does, have several diagnoses.  Also,
    a diagnosis can be repeatedly reported on every visit."""
    p = loader.get_patient_by_EMPI(empi)
    diagnoses = [] 
    if 'Dia' in p.keys():
        for dia in p['Dia']:
            diagnoses.append((extract_data.parse_date(dia['Date']), dia['Code_Type'], dia['Code'], dia['Diagnosis_Name']))
        diagnoses.sort()
    return diagnoses
Пример #6
0
 def get_concatenated_notes(self, empi):
     person = loader.get_patient_by_EMPI(empi)
     operation_date = build_graphs.get_operation_date(person)
     date_key = extract_data.get_date_key(self.type)
     notes = []
     sec_per_month = 24 * 60 * 60 * (365.0 / 12)
     if self.type in person.keys() and date_key != None:
         for i in range(len(person[self.type])):
             doc = person[self.type][i]
             date = extract_data.parse_date(doc[date_key])
             if date != None and date < operation_date:
                 if self.look_back_months and (operation_date - date).total_seconds() > (self.look_back_months * sec_per_month):
                     continue
                 notes.append(doc['free_text'])
     return '\n\n'.join(notes)        
Пример #7
0
def get_lab_history_before_date(empi, date, time_thresholds_months):
    """Given an empi and a date, will return a summarized history of the labs for that patient
    before the date.  Specifically, will return a dictionary where the key is a lab group id and
    the value is a list of size len(time_threshold_months) where each index represents whether the lab was mostly high or low
    in the threshold times set it time_thresholds_months.  For example, if we have 'BUN' => ['H', None, 'L'],
    then this indicates a transition from low (L) to high (H) leading up to the indicated date."""
    p = loader.get_patient_by_EMPI(empi)
    lab_history_counts = {}
    """
    lab_history_counts is 2-D array
    first dimension = time period
    second dimension = counts of 'H', 'L', and None
    example = [[15, 1, 2], ...] means in the past 1 month, 'H' was most (15 times)
    """
    seconds_in_month = 365 * 24 * 60 * 60 / 12
    values = ['H', 'L', None]
    if 'Lab' in p.keys():
        for lab in p['Lab']:
            if lab['Seq_Date_Time'] and extract_data.parse_date(lab['Seq_Date_Time']) < date:
                lab_date = extract_data.parse_date(lab['Seq_Date_Time'])
                value = lab['Abnormal_Flag'] if lab['Abnormal_Flag'] in ['H', 'L'] else None
                value_index = values.index(value)
                time_index = 0
                while time_index < len(time_thresholds_months) and (date - lab_date).total_seconds() > (time_thresholds_months[time_index] * seconds_in_month):
                    time_index += 1
                if time_index >= len(time_thresholds_months):
                    continue
                if lab['Group_Id'] not in lab_history_counts:
                    lab_history_counts[lab['Group_Id']] = np.zeros([len(time_thresholds_months), len(values)])
                lab_history_counts[lab['Group_Id']][time_index][value_index] += 1
    lab_history = {}
    for lab_name in lab_history_counts:
        lab_history[lab_name] = [None] * len(time_thresholds_months)
        for i in range(len(time_thresholds_months)):
            lab_history[lab_name][i] = values[lab_history_counts[lab_name][i].argmax()]
    return lab_history                  
Пример #8
0
def get_labs_before_date(empi, date):
    """Given an empi and a date, will return the labs for that patient before that date.
    Specifically, will return four dictionaries where the key is always the lab group id
    and the values are the total counts, low counts, high counts, and latest (date, low/high) tuple for 
    that test respectively. Note that low and high mean the test value was below or above the norm respectively."""
    p = loader.get_patient_by_EMPI(empi)
    lab_counts = {}
    lab_lows = {}
    lab_highs = {}
    lab_latest = {}
    if 'Lab' in p.keys():
        for lab in p['Lab']:
            if lab['Seq_Date_Time'] and extract_data.parse_date(lab['Seq_Date_Time']) < date: 
                if lab['Group_Id'] in lab_counts:
                    lab_counts[lab['Group_Id']] += 1
                else:
                    lab_counts[lab['Group_Id']] = 1
                lab_date = extract_data.parse_date(lab['Seq_Date_Time'])
                if lab['Group_Id'] in lab_latest:
                    recorded_test_date = lab_latest[lab['Group_Id']][0]
                    if lab_date > recorded_test_date: # keep most recent test value
                        lab_latest[lab['Group_Id']] = (lab_date, lab['Abnormal_Flag'])
                else:
                    lab_latest[lab['Group_Id']] = (lab_date, lab['Abnormal_Flag'])
                if lab['Abnormal_Flag']:
                    if lab['Abnormal_Flag'] == 'L':
                        if lab['Group_Id'] in lab_lows:
                            lab_lows[lab['Group_Id']] += 1
                        else:
                            lab_lows[lab['Group_Id']] = 1
                    elif lab['Abnormal_Flag'] == 'H':
                        if lab['Group_Id'] in lab_highs:
                            lab_highs[lab['Group_Id']] += 1
                        else:
                            lab_highs[lab['Group_Id']] = 1
    return lab_counts, lab_lows, lab_highs, lab_latest
Пример #9
0
def get_encounters(empi):
    """Given an empi, returns a list of encounters for that patient
    sorted by Admit Date (since Discharge Date is not always recorded)."""
    p = loader.get_patient_by_EMPI(empi)
    encounters = []
    if 'Enc' in p.keys():
        for enc in p['Enc']:
            extra_diagnoses = 0
            for i in range(1, 10):
                if enc['Diagnosis_' + str(i)]:
                    extra_diagnoses += 1
            if enc['Admit_Date']:
                encounters.append((extract_data.parse_date(enc['Admit_Date']), str(enc['Inpatient_Outpatient']), extract_data.parse_date(enc['Discharge_Date']), int(enc['LOS_Days']) if enc['LOS_Days'] else 0, extra_diagnoses))
        encounters.sort(key = lambda x: x[0]) # just sort on Admit_Date
    return encounters
    def select_doc(self, doc, operation_date, doc_type):
        """
        description: function that returns is specific doc should be used
        inputs: dict of the doc, datetime of the procedure, string of doc type
        output: boolean
        """

        doc_date_text = doc[extract_data.get_date_key(doc_type)]
        doc_date = extract_data.parse_date(doc_date_text)
        if doc_date is None:
            return False
        time_diff = (doc_date - operation_date).days
        if self.time_horizon != None:    
            return time_diff <= 0 and abs(time_diff) <= abs(self.time_horizon)
        else:
            return time_diff <= 0
Пример #11
0
    def select_doc(self, doc, operation_date, doc_type):
        """
        description: function that returns is specific doc should be used
        inputs: dict of the doc, datetime of the procedure, string of doc type
        output: boolean
        """

        doc_date_text = doc[extract_data.get_date_key(doc_type)]
        doc_date = extract_data.parse_date(doc_date_text)
        if doc_date is None:
            return False
        time_diff = (doc_date - operation_date).days
        if self.time_horizon != None:
            return time_diff <= 0 and abs(time_diff) <= abs(self.time_horizon)
        else:
            return time_diff <= 0
Пример #12
0
    def parse_value(self, doc, operation_date, doc_type):
        """
        description: function that returns the desired value from a document
        inputs: dict of the doc, datetime of the procedure, string of doc type
        ouput: value of any type
        """

        note = doc['free_text'].lower()

        doc_date_text = doc[extract_data.get_date_key(doc_type)]
        doc_date = extract_data.parse_date(doc_date_text)
        delta_days = (doc_date - operation_date).days

        values = []
        for pattern in self.patterns:
            values += [
                x for x in re.findall(pattern, note)
                if len(x) > 0 and not x in [".", " "]
            ]
        if values != []:
            pass  #            print values
        if len(values) > 0 and not self.method in ['found', 'count', 'other']:
            val_before = values
            values = [float(x) for x in values if unicode(x).isnumeric()]
            try:
                if len(values) == 0:
                    return None
                else:
                    return (delta_days, sum(values) / len(values))
            except:
                print "\n" * 5
                print values
                print "\n" * 5
                raise
        elif self.method == 'other':  #returns entire value list
            return (delta_days, values)
        elif self.method == 'found':
            return (delta_days, 1)
        elif self.method == 'count':
            return [(delta_days, len(values))]
        return None
Пример #13
0
 def get_latest_concatenated_notes(self, empi):
     person = loader.get_patient_by_EMPI(empi)
     operation_date = build_graphs.get_operation_date(person)
     date_key = extract_data.get_date_key(self.type)
     notes = []
     if self.type in person.keys() and date_key != None:
         time_key_pairs = []
         for i in range(len(person[self.type])):
             doc = person[self.type][i]
             date = extract_data.parse_date(doc[date_key])
             if date != None and date < operation_date:
                 time_key_pairs.append((operation_date - date, i))
         time_key_pairs.sort()
         for time,key in time_key_pairs[:self.max_notes]:
             doc = person[self.type][key]
             notes.append(doc['free_text'])
     # ensure that notes vector length is equal to max_notes
     if len(notes) < self.max_notes:
         delta = self.max_notes - len(notes)
         for i in range(delta):
             notes.append('')  
     return np.array(notes)
    def parse_value(self, doc, operation_date, doc_type):
        """
        description: function that returns the desired value from a document
        inputs: dict of the doc, datetime of the procedure, string of doc type
        ouput: value of any type
        """

        note = doc['free_text'].lower()
         
        doc_date_text = doc[extract_data.get_date_key(doc_type)]
        doc_date = extract_data.parse_date(doc_date_text)
        delta_days = (doc_date - operation_date).days
        
        values = []
        for pattern in self.patterns:
            values += [x for x in re.findall(pattern, note) if len(x) > 0 and not x in [".", " "]]
        if values != []:
            pass#            print values
        if len(values) > 0 and not self.method in ['found', 'count', 'other']:
            val_before = values
            values = [float(x) for x in values if unicode(x).isnumeric()]
            try:
                if len(values) == 0:
                    return None
                else:    
                    return (delta_days, sum(values)/len(values))
            except:
                print "\n"*5
                print values
                print "\n"*5
                raise
        elif self.method == 'other': #returns entire value list
            return (delta_days, values )
        elif self.method == 'found':
            return (delta_days, 1)
        elif self.method == 'count':
            return [(delta_days, len(values))]
        return None