示例#1
0
    def _add_features(self):
        # Add lab panel order features.
        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':
            self._factory.addClinicalItemFeatures([self._lab_var],
                                                  features="pre",
                                                  isLabPanel=self._isLabPanel)
        else:
            # TODO: naming
            self._factory.addClinicalItemFeatures_UMich(
                [self._lab_var],
                features="pre",
                clinicalItemType=self._varTypeInTable,
                clinicalItemTime='order_time',
                tableName='labs')  #sx

        # Add lab component result features, for a variety of time deltas.
        LAB_PRE_TIME_DELTAS = [datetime.timedelta(-14)]
        LAB_POST_TIME_DELTA = datetime.timedelta(0)
        log.info('Adding lab component features...')
        for pre_time_delta in LAB_PRE_TIME_DELTAS:
            log.info('\t%s' % pre_time_delta)
            self._factory.addLabResultFeatures(self._lab_components, False,
                                               pre_time_delta,
                                               LAB_POST_TIME_DELTA)

        FeatureMatrix._add_features(self, index_time_col='order_time')
示例#2
0
    def __init__(self, lab_panel, num_episodes, random_state=None):
        FeatureMatrix.__init__(self, lab_panel, num_episodes)

        # Parse arguments.
        self._lab_panel = lab_panel
        self._num_requested_episodes = num_episodes
        self._num_reported_episodes = 0
        if random_state:
            query = SQLQuery()
            query.addSelect('setseed(%d);' % random_state)
            DBUtil.execute(query)
            self._random_state = random_state

        # Query patient episodes.
        self._query_patient_episodes()
        episodes = self._factory.getPatientEpisodeIterator()
        patients = set()
        for episode in episodes:
            patient_id = int(episode[self._factory.patientEpisodeIdColumn])
            patients.add(patient_id)
        self._num_patients = len(patients)

        # Add features.
        self._add_features()

        # Build matrix.
        FeatureMatrix._build_matrix(self)
示例#3
0
    def _add_features(self):
        # # Add past susceptibility readings
        self._add_susc_features()

        # # Add past antibiotic use as features
        self._add_med_features()

        # # Add lab panel order features.
        for panel in self._lab_features:
            self._factory.addClinicalItemFeatures([panel], features="pre")
        FeatureMatrix._add_features(self, index_time_col='shifted_order_time')
示例#4
0
    def _add_features(self):
        # Add lab panel order features.
        self._factory.addClinicalItemFeatures([self._lab_panel], features="pre")

        # Add lab component result features, for a variety of time deltas.
        LAB_PRE_TIME_DELTAS = [datetime.timedelta(-14)]
        LAB_POST_TIME_DELTA = datetime.timedelta(0)
        log.info('Adding lab component features...')
        for pre_time_delta in LAB_PRE_TIME_DELTAS:
            log.info('\t%s' % pre_time_delta)
            self._factory.addLabResultFeatures(self._lab_components, False, pre_time_delta, LAB_POST_TIME_DELTA)

        FeatureMatrix._add_features(self, index_time_col='order_time')
示例#5
0
    def __init__(self,
                 lab_var,
                 num_episodes,
                 random_state=None,
                 isLabPanel=True,
                 timeLimit=None,
                 notUsePatIds=None):
        FeatureMatrix.__init__(self, lab_var, num_episodes)

        self._isLabPanel = isLabPanel
        if isLabPanel:
            self._varTypeInTable = 'proc_code'
            self._ylabel = 'all_components_normal'
        else:
            self._varTypeInTable = 'base_name'
            self._ylabel = 'component_normal'

        # Parse arguments.
        self._lab_var = lab_var
        self._num_requested_episodes = num_episodes
        self._num_reported_episodes = 0
        # SQLite's random() function does not support a seed value.
        if random_state:
            self._random_state = random_state
            if LocalEnv.DATABASE_CONNECTOR_NAME == 'psycopg2':
                query = SQLQuery()
                query.addSelect('setseed(%d);' % random_state)
                DBUtil.execute(query)

        self._time_limit = timeLimit

        if notUsePatIds:
            self._notUsePatIds = list(notUsePatIds)
        else:
            self._notUsePatIds = []

        # Query patient episodes.
        self._query_patient_episodes()
        episodes = self._factory.getPatientEpisodeIterator()
        patients = set()
        for episode in episodes:
            patient_id = int(episode[self._factory.patientEpisodeIdColumn])
            patients.add(patient_id)
        self._num_patients = len(patients)

        # Add features.
        self._add_features()

        # Build matrix.
        FeatureMatrix._build_matrix(self)
示例#6
0
    def __init__(self,
                 lab_panel,
                 lab_features,
                 num_episodes,
                 random_state=None):
        FeatureMatrix.__init__(self, lab_panel, num_episodes)

        # Parse arguments.
        self._lab_panel = lab_panel
        self._lab_features = lab_features

        self._med_panel = [
            ['Cefepime (Oral)', 'Cefepime (Intravenous)'],
            ['Cefazolin (Oral)', 'Cefazolin (Intravenous)'],
            ['Ceftriaxone (Oral)', 'Ceftriaxone (Intravenous)'],
            ['Meropenem (Oral)', 'Meropenem (Intravenous)'],
            ['Vancomycin (Oral)', 'Vancomycin (Intravenous)'],
            ['Linezolid (Oral)', 'Linezolid (Intravenous)'],
            ['Daptomycin (Oral)', 'Daptomycin (Intravenous)'],
            ['Levofloxacin (Oral)', 'Levofloxacin (Intravenous)'],
            ['Ciprofloxacin (Oral)', 'Ciprofloxacin (Intravenous)'],
            ['Ampicillin (Oral)', 'Ampicillin (Intravenous)'],
            ['Metronidazole (Oral)', 'Metronidazole (Intravenous)'],
            ['Caspofungin (Oral)', 'Caspofungin (Intravenous)']
        ]
        # susceptibility_df = pd.read_csv('/Users/conorcorbin/repos/CDSS/scripts/LabCulturePrediction/Susceptibility_Feature_Names.csv')
        susceptibility_df = pd.read_csv(
            '/home/ec2-user/CDSS/scripts/LabCulturePrediction/Susceptibility_Feature_Names.csv'
        )
        self._susceptibility_names = susceptibility_df['name'].values
        self._num_requested_episodes = num_episodes
        self._num_reported_episodes = 0

        # Query patient episodes.
        self._query_patient_episodes()
        episodes = self._factory.getPatientEpisodeIterator()
        patients = set()
        for episode in episodes:
            patient_id = int(episode[self._factory.patientEpisodeIdColumn])
            patients.add(patient_id)
        self._num_patients = len(patients)

        # Add features.
        self._add_features()

        # Build matrix.
        FeatureMatrix._build_matrix(self)
示例#7
0
    def _build_matrix_header(self, matrix_path):
        params = {}

        params['matrix_path'] = matrix_path
        params['matrix_module'] = inspect.getfile(inspect.currentframe())
        params['data_overview'] = self._build_data_overview()
        params['field_summary'] = self._build_field_summary()
        params['include_lab_suffix_summary'] = True
        params['include_clinical_item_suffix_summary'] = True

        return FeatureMatrix._build_matrix_header(self, params)
示例#8
0
    def _add_features(self):
        # Add past susceptibility readings
        self._add_susc_features()

        # Add past antibiotic use as features
        self._add_med_features()

        # Add lab panel order features.
        self._factory.addClinicalItemFeatures(self._lab_panel, features="pre")

        # Add lab component result features, for a variety of time deltas.
        LAB_PRE_TIME_DELTAS = [datetime.timedelta(-14)]
        LAB_POST_TIME_DELTA = datetime.timedelta(0)
        log.info('Adding lab component features...')
        for pre_time_delta in LAB_PRE_TIME_DELTAS:
            log.info('\t%s' % pre_time_delta)
            self._factory.addLabResultFeatures(self._lab_components, False,
                                               pre_time_delta,
                                               LAB_POST_TIME_DELTA)

        FeatureMatrix._add_features(self, index_time_col='shifted_order_time')
示例#9
0
    def __init__(self, lab_panel, num_episodes):
        FeatureMatrix.__init__(self, lab_panel, num_episodes)

        # Parse arguments.
        self._lab_panel = lab_panel
        self._num_requested_episodes = num_episodes
        self._num_reported_episodes = 0

        # Query patient episodes.
        self._query_patient_episodes()
        episodes = self._factory.getPatientEpisodeIterator()
        patients = set()
        for episode in episodes:
            patient_id = int(episode[self._factory.patientEpisodeIdColumn])
            patients.add(patient_id)
        self._num_patients = len(patients)

        # Add features.
        self._add_features()

        # Build matrix.
        FeatureMatrix._build_matrix(self)
示例#10
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()
        self.usedPatIds = random_patient_list[:]

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':  # TODO: component
            query = SQLQuery()
            query.addSelect('CAST(pat_id AS BIGINT)')
            query.addSelect('sop.order_proc_id AS order_proc_id')
            query.addSelect('proc_code')  #TODO:sx
            query.addSelect('order_time')
            query.addSelect(
                "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel"
            )  #sx
            query.addSelect(
                "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components"
            )  #sx
            query.addSelect(
                "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components"
            )  #sx
            query.addSelect(
                "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal"
            )  #sx
            query.addFrom('stride_order_proc AS sop')  #sx
            query.addFrom('stride_order_results AS sor')  #sx
            query.addWhere('sop.order_proc_id = sor.order_proc_id')  #sx
            query.addWhere(
                "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)"
            )
            query.addWhereIn("proc_code", [self._lab_var])  #sx
            query.addWhereIn("pat_id", random_patient_list)
            query.addGroupBy('pat_id')
            query.addGroupBy('sop.order_proc_id')  #sx
            query.addGroupBy('proc_code')  #sx
            query.addGroupBy('order_time')
            query.addGroupBy('abnormal_yn')  #sx
            query.addOrderBy('pat_id')
            query.addOrderBy('sop.order_proc_id')  #sx
            query.addOrderBy('proc_code')  #sx
            query.addOrderBy('order_time')
            query.setLimit(self._num_requested_episodes)

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query, index_time_col='order_time')

        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':
            # query = SQLQuery()
            # query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            # query.addSelect('order_proc_id')
            # query.addSelect(self._varTypeInTable)
            # query.addSelect('order_time')
            #
            # query.addWhereIn(self._varTypeInTable, [self._lab_var])
            #
            # if self._isLabPanel:
            #
            #     query.addSelect("SUM(CASE WHEN result_in_range_yn IN ('N', 'Y') THEN 1 ELSE 0 END) AS num_components")
            #     query.addSelect("SUM(CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END) AS num_normal_components")
            #     query.addSelect("CAST(SUM(CASE WHEN result_in_range_yn = 'N' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal") #TODO
            # else:
            #     query.addSelect("CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal")
            #
            # query.addFrom('labs')
            # query.addWhereIn("pat_id", random_patient_list)
            #
            # query.addGroupBy('pat_id')
            # query.addGroupBy('order_proc_id')
            # query.addGroupBy(self._varTypeInTable)
            # query.addGroupBy('order_time')
            #
            # query.addOrderBy('pat_id')
            # query.addOrderBy('order_proc_id')
            # query.addOrderBy(self._varTypeInTable)
            # query.addOrderBy('order_time')
            #
            # query.setLimit(self._num_requested_episodes)
            #
            # print query

            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable
            if not self._isLabPanel:
                query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal "
            else:
                query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal "
            # query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal "
            query_str += "FROM labs "
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            query_str += "AND pat_id IN "
            pat_list_str = "("
            for pat_id in random_patient_list:
                pat_list_str += str(pat_id) + ","
            pat_list_str = pat_list_str[:-1] + ") "
            query_str += pat_list_str
            query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "LIMIT %d" % self._num_requested_episodes

            self._num_reported_episodes = FeatureMatrix._querystr_patient_episodes(
                self, query_str, index_time_col='order_time')
示例#11
0
 def write_matrix(self, dest_path):
     log.info('Writing %s...' % dest_path)
     header = self._build_matrix_header(dest_path)
     FeatureMatrix.write_matrix(self, dest_path, header)
示例#12
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.

        query = """SELECT
                    pat_anon_id as pat_id,
                    pat_enc_csn_anon_id,
                    shifted_order_time,
                    order_proc_anon_id,
                    organism_name,
                    proc_code,
                    
                    MAX(CASE WHEN antibiotic_name = 'Ampicillin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ampicillin_susc,
                    MAX(CASE WHEN antibiotic_name = 'Ampicillin/Sulbactam' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ampicillin_sulbactam_susc,
                    MAX(CASE WHEN antibiotic_name = 'Piperacillin/Tazobactam' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as piperacillin_tazobactam_susc,

                    MAX(CASE WHEN antibiotic_name in ('Cefazolin', 'Cefazolin..') and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as cefazolin_susc,
                    MAX(CASE WHEN antibiotic_name in ('Ceftriaxone', 'Ceftriaxone.') and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ceftriaxone_susc,
                    MAX(CASE WHEN antibiotic_name = 'Cefepime' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as cefepime_susc,

                    MAX(CASE WHEN antibiotic_name in ('Aztreonam', 'Aztreonam.') and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as aztreonam_susc,
                    MAX(CASE WHEN antibiotic_name = 'Imipenem' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as imipenem_susc,
                    MAX(CASE WHEN antibiotic_name = 'Meropenem' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as meropenem_susc,

                    MAX(CASE WHEN antibiotic_name = 'Gentamicin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as gentamicin_susc,
                    MAX(CASE WHEN antibiotic_name = 'Tobramycin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as tobramycin_susc,
                    MAX(CASE WHEN antibiotic_name = 'Amikacin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as amikacin_susc,

                    MAX(CASE WHEN antibiotic_name = 'Ciprofloxacin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ciprofloxacin_susc,
                    MAX(CASE WHEN antibiotic_name = 'Levofloxacin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as levofloxacin_susc,
                    MAX(CASE WHEN antibiotic_name = 'Trimethoprim/Sulfamethoxazole.' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as trimethoprim_sulfamethoxazole_susc,

                    MAX(CASE WHEN antibiotic_name = 'Nitrofurantoin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as nitrofurantoin_susc,

                    MAX(CASE WHEN antibiotic_name = 'Ampicillin' then 1 else 0 end) as ampicillin_tested,
                    MAX(CASE WHEN antibiotic_name = 'Ampicillin/Sulbactam' then 1 else 0 end) as ampicillin_sulbactam_tested,
                    MAX(CASE WHEN antibiotic_name = 'Piperacillin/Tazobactam' then 1 else 0 end) as piperacillin_tazobactam_tested,

                    MAX(CASE WHEN antibiotic_name in ('Cefazolin', 'Cefazolin..') then 1 else 0 end) as cefazolin_tested,
                    MAX(CASE WHEN antibiotic_name in ('Ceftriaxone', 'Ceftriaxone.') then 1 else 0 end) as ceftriaxone_tested,
                    MAX(CASE WHEN antibiotic_name = 'Cefepime' then 1 else 0 end) as cefepime_tested,

                    MAX(CASE WHEN antibiotic_name in ('Aztreonam', 'Aztreonam.') then 1 else 0 end) as aztreonam_tested,
                    MAX(CASE WHEN antibiotic_name = 'Imipenem' then 1 else 0 end) as imipenem_tested,
                    MAX(CASE WHEN antibiotic_name = 'Meropenem' then 1 else 0 end) as meropenem_tested,

                    MAX(CASE WHEN antibiotic_name = 'Gentamicin' then 1 else 0 end) as gentamicin_tested,
                    MAX(CASE WHEN antibiotic_name = 'Tobramycin' then 1 else 0 end) as tobramycin_tested,
                    MAX(CASE WHEN antibiotic_name = 'Amikacin' then 1 else 0 end) as amikacin_tested,

                    MAX(CASE WHEN antibiotic_name = 'Ciprofloxacin' then 1 else 0 end) as ciprofloxacin_tested,
                    MAX(CASE WHEN antibiotic_name = 'Levofloxacin' then 1 else 0 end) as levofloxacin_tested,
                    MAX(CASE WHEN antibiotic_name = 'Trimethoprim/Sulfamethoxazole.' then 1 else 0 end) as trimethoprim_sulfamethoxazole_tested,

                    MAX(CASE WHEN antibiotic_name = 'Nitrofurantoin' then 1 else 0 end) as nitrofurantoin_tested
                FROM 
                    stride_culture_micro
                WHERE
                    antibiotic_name in ('Ampicillin', 'Ampicillin/Sulbactam', 'Piperacillin/Tazobactam', 'Cefazolin', 'Cefazolin..',
                                       'Ceftriaxone', 'Ceftriaxone.', 'Cefepime', 'Aztreonam', 'Aztreonam.', 'Imipenem', 'Meropenem',
                                       'Gentamicin', 'Tobramycin', 'Amikacin', 'Ciprofloxacin', 'Levofloxacin', 'Trimethoprim/Sulfamethoxazole.',
                                       'Nitrofurantoin')
                    AND
                    organism_name in ('ESCHERICHIA COLI', 'KLEBSIELLA PNEUMONIAE', 'PSEUDOMONAS AERUGINOSA', 'PROTEUS MIRABILIS')
                GROUP BY pat_id, pat_enc_csn_anon_id, shifted_order_time, order_proc_anon_id, organism_name, proc_code
                ORDER BY pat_id, pat_enc_csn_anon_id, shifted_order_time
                """
        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
            self,
            query,
            pat_id_col='pat_id',
            index_time_col='shifted_order_time')
示例#13
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':

            query = SQLQuery()
            '''
            pat_id: hashed patient id
            '''
            query.addSelect('CAST(pat_id AS BIGINT) as pat_id')
            '''
            order_proc_id: unique identifier for an episode
            '''
            query.addSelect('sop.order_proc_id')
            '''
            self._varTypeInTable: usually proc_code or base_name, the column of the lab to be queried
            '''
            query.addSelect(self._varTypeInTable)
            '''
            order_time: The time of the order. Note that sor table does not have this info. 
            '''
            query.addSelect('order_time')
            '''
            y-labels related columns, choose one to predict (for now, use all_components_normal to predict). 
            '''
            if self._isLabPanel:
                query.addSelect(
                    "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel"
                )  #
                query.addSelect(
                    "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components"
                )  # sx
                query.addSelect(
                    "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components"
                )  # sx
                query.addSelect(
                    "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal"
                )  # sx
            else:
                query.addSelect(
                    "CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal"
                )
            '''
            Relevant tables. Note that sor table does not have patient_id info; need to join sop to obtain it.  
            '''
            query.addFrom('stride_order_proc AS sop')
            query.addFrom('stride_order_results AS sor')
            query.addWhere('sop.order_proc_id = sor.order_proc_id')
            '''
            Condition: self._time_limit[0] < order_time < self._time_limit[1]
            '''
            if self._time_limit:
                if self._time_limit[0]:
                    query.addWhere("sop.order_time > '%s'" %
                                   self._time_limit[0])
                if self._time_limit[1]:
                    query.addWhere("sop.order_time < '%s'" %
                                   self._time_limit[1])

            query.addWhere(
                "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)"
            )
            query.addWhereIn(self._varTypeInTable, [self._lab_var])  # sx
            query.addWhereIn("pat_id", random_patient_list)

            query.addGroupBy('pat_id')
            query.addGroupBy('sop.order_proc_id')
            query.addGroupBy(self._varTypeInTable)
            query.addGroupBy('order_time')
            if not self._isLabPanel:
                query.addGroupBy('result_flag')
            query.addGroupBy('abnormal_yn')  #

            query.addOrderBy('pat_id')
            query.addOrderBy('sop.order_proc_id')
            query.addOrderBy(self._varTypeInTable)
            query.addOrderBy('order_time')

            query.setLimit(self._num_requested_episodes)

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query, index_time_col='order_time')

        else:
            '''
            Sqlite3 has an interesting limit for the total number of place_holders in a query, 
            and this limit varies across platforms/operating systems (500-99999 on mac, 999 by defaulty). 
            
            To avoid this problem when querying 1000-10000 patient ids, use string queries instead of the
            default (convenient) routine in DBUtil.  
            '''

            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable
            if not self._isLabPanel:
                query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal "
            else:
                query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal "
            query_str += "FROM labs "
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            query_str += "AND pat_id IN "
            pat_list_str = "("
            for pat_id in random_patient_list:
                pat_list_str += str(pat_id) + ","
            pat_list_str = pat_list_str[:-1] + ") "
            query_str += pat_list_str
            query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "LIMIT %d" % self._num_requested_episodes

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query_str, index_time_col='order_time')
示例#14
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        #print("RANDOM PATIENT LIST", random_patient_list)

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.
        query = SQLQuery()
        query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id')
        query.addSelect('CAST(shifted_order_time AS TIMESTAMP)')
        query.addSelect('stride_culture_micro.proc_code')
        query.addSelect('organism_name')  #one for the result

        # Let us look at top 10 commonly occuring bacteria
        query.addSelect(
            "CASE WHEN organism_name IS NULL THEN 1 ELSE 0 END AS NO_BACTERIA")
        query.addSelect(
            "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS"
        )

        query.addFrom('stride_culture_micro')

        query.addWhereIn("stride_culture_micro.proc_code", [self._lab_panel])
        query.addWhereIn("pat_id", random_patient_list)
        query.addGroupBy('pat_id')
        query.addGroupBy('shifted_order_time')
        query.addGroupBy('stride_culture_micro.proc_code')
        query.addGroupBy('organism_name')
        query.addOrderBy('pat_id')
        query.addOrderBy('shifted_order_time')
        query.addOrderBy('stride_culture_micro.proc_code')
        query.addOrderBy('organism_name')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
            self,
            query,
            pat_id_col='pat_id',
            index_time_col='shifted_order_time')
示例#15
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        #print("RANDOM PATIENT LIST", random_patient_list)

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.
        query = SQLQuery()
        query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id')
        query.addSelect('CAST(shifted_order_time AS TIMESTAMP)')
        query.addSelect('stride_culture_micro.proc_code')
        query.addSelect('organism_name')  #one for the result

        # Experimenting
        susceptibility_flags = [
            'Trimethoprim/Sulfamethoxazole', 'Vancomycin', 'Penicillin',
            'Levofloxacin', 'Clindamycin', 'Ceftriaxone', 'Erythromycin',
            'Ampicillin', 'Meropenem', 'Ciprofloxacin', 'Cefepime',
            'Aztreonam.', 'Ampicillin/Sulbactam', 'Piperacillin/Tazobactam',
            'Linezolid', 'Oxacillin.', 'Cefazolin', 'Daptomycin'
        ]

        for med in susceptibility_flags:
            med_col = med.replace('/', '_').replace('.', '')
            query.addSelect(
                "MAX(CASE WHEN antibiotic_name = '%s' AND (suseptibility = 'Susceptible' OR suseptibility = 'Positive') THEN 1 ELSE 0 END) AS %s_Susc"
                % (med, med_col))
            query.addSelect(
                "MAX(CASE WHEN antibiotic_name = '%s' THEN 1 ELSE 0 END) as %s_tested"
                % (med, med_col))

        # Let us look at top 10 commonly occuring bacteria
        query.addSelect(
            "CASE WHEN organism_name IS NULL THEN 0 ELSE 1 END AS BACTERIA_PRESENT"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS"
        )
        query.addSelect(
            "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS"
        )

        query.addFrom('stride_culture_micro')

        query.addWhereIn("stride_culture_micro.proc_code", self._lab_panel)
        query.addWhereIn("pat_anon_id", random_patient_list)
        query.addGroupBy('pat_anon_id')
        query.addGroupBy('shifted_order_time')
        query.addGroupBy('stride_culture_micro.proc_code')
        query.addGroupBy('organism_name')
        query.addOrderBy('pat_anon_id')
        query.addOrderBy('shifted_order_time')
        query.addOrderBy('stride_culture_micro.proc_code')
        query.addOrderBy('organism_name')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
            self,
            query,
            pat_id_col='pat_id',
            index_time_col='shifted_order_time')
示例#16
0
    def _query_patient_episodes(self, pat_id_col="pat_deid"):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        print("RANDOM PATIENT LIST", random_patient_list)

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.
        query = SQLQuery()
        #query.addSelect('CAST(micro.pat_deid AS BIGINT)')
        query.addSelect('CAST(micro.order_time AS TIMESTAMP)')
        #query.addSelect('sop.order_proc_id AS order_proc_id')
        query.addSelect('proc_code')
        query.addSelect('order_time')
        query.addSelect('organism_name') #one for the result
        # query.addSelect("CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel")
        # query.addSelect("SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components")
        # query.addSelect("SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components")
        # query.addSelect("CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal")
        # query.addFrom('stride_order_proc AS sop')
        # query.addFrom('stride_order_results AS sor')
        query.addFrom('micro')
        #query.addWhere('sop.order_proc_id = sor.order_proc_id')
        #query.addWhere("(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)")
        #query.addWhereIn("micro.proc_code", [self._lab_panel])
        query.addWhereIn("micro.pat_deid", random_patient_list)
        query.addGroupBy('micro.pat_deid')
        #query.addGroupBy('sop.order_proc_id')
        query.addGroupBy('proc_code')
        query.addGroupBy('order_time')
        query.addGroupBy('organism_name')
        #query.addGroupBy('abnormal_yn')
        query.addOrderBy('micro.pat_deid')
        #query.addOrderBy('sop.order_proc_id')
        query.addOrderBy('proc_code')
        query.addOrderBy('micro.order_time')
        query.setLimit(self._num_requested_episodes)

        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(self, query, index_time_col='order_time')
示例#17
0
文件: MRSAMatrix.py 项目: xxxx3/CDSS
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.

        query = """SELECT DISTINCT ON(labels.pat_id, labels.pat_enc_csn_id)
                    labels.pat_id,
                    labels.pat_enc_csn_id,
                    labels.order_time,
                    labels.MRSA_PRESENT
                   FROM
                    (SELECT 
                        vanco_zosyn.pat_id,
                        vanco_zosyn.pat_enc_csn_id, 
                        vanco_zosyn.order_time,
                        MAX(CASE WHEN micro.organism_name = 'STAPHYLOCOCCUS AUREUS' AND 
                                      (micro.antibiotic_name = 'Oxacillin' OR
                                       micro.antibiotic_name = 'Oxacillin.') AND
                                      micro.suseptibility = 'Resistant' 
                                 THEN 1
                                 ELSE 0
                            END) as MRSA_PRESENT          
                    FROM
                      (SELECT 
                           pip_tazo.pat_id, 
                           pip_tazo.pat_enc_csn_id,
                           pip_start_time AS order_time 
                       FROM 
                         (SELECT 
                              pat_id, 
                              pat_enc_csn_id, 
                              start_taking_time AS pip_start_time,
                              end_taking_time AS pip_end_time,
                              description AS pip_description
                           FROM stride_order_med
                           WHERE description LIKE '%PIPERACILLIN-TAZOBACTAM%') pip_tazo
                       INNER JOIN
                          (SELECT 
                            pat_id, 
                            pat_enc_csn_id,
                            start_taking_time AS vanco_start_time,
                            end_taking_time AS vanco_end_time,
                            description AS vanco_desciption
                           FROM stride_order_med
                           WHERE description LIKE '%VANCOMYCIN%') vanco
                       ON pip_tazo.pat_enc_csn_id = vanco.pat_enc_csn_id AND pip_start_time = vanco_start_time) vanco_zosyn
                    INNER JOIN
                     (SELECT 
                          pat_anon_id,
                          pat_enc_csn_anon_id,
                          proc_code,
                          description,
                          organism_name,
                          antibiotic_name,
                          suseptibility,
                          shifted_order_time
                      FROM stride_culture_micro) micro
                   ON vanco_zosyn.pat_id = micro.pat_anon_id AND vanco_zosyn.pat_enc_csn_id = micro.pat_enc_csn_anon_id
                     WHERE 
                        DATE_PART('day', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) * 24 + DATE_PART('hour', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) >= 0 AND 
                        DATE_PART('day', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) * 24 + DATE_PART('hour', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) <= 24
                     GROUP BY pat_id, pat_enc_csn_id, order_time) as labels
                   ORDER BY pat_id, pat_enc_csn_id, order_time
                   """
        self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
            self, query, pat_id_col='pat_id', index_time_col='order_time')