def _add_features(self): # Add lab panel order features. if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': self._factory.addClinicalItemFeatures([self._lab_var], features="pre", isLabPanel=self._isLabPanel) else: # TODO: naming self._factory.addClinicalItemFeatures_UMich( [self._lab_var], features="pre", clinicalItemType=self._varTypeInTable, clinicalItemTime='order_time', tableName='labs') #sx # Add lab component result features, for a variety of time deltas. LAB_PRE_TIME_DELTAS = [datetime.timedelta(-14)] LAB_POST_TIME_DELTA = datetime.timedelta(0) log.info('Adding lab component features...') for pre_time_delta in LAB_PRE_TIME_DELTAS: log.info('\t%s' % pre_time_delta) self._factory.addLabResultFeatures(self._lab_components, False, pre_time_delta, LAB_POST_TIME_DELTA) FeatureMatrix._add_features(self, index_time_col='order_time')
def __init__(self, lab_panel, num_episodes, random_state=None): FeatureMatrix.__init__(self, lab_panel, num_episodes) # Parse arguments. self._lab_panel = lab_panel self._num_requested_episodes = num_episodes self._num_reported_episodes = 0 if random_state: query = SQLQuery() query.addSelect('setseed(%d);' % random_state) DBUtil.execute(query) self._random_state = random_state # Query patient episodes. self._query_patient_episodes() episodes = self._factory.getPatientEpisodeIterator() patients = set() for episode in episodes: patient_id = int(episode[self._factory.patientEpisodeIdColumn]) patients.add(patient_id) self._num_patients = len(patients) # Add features. self._add_features() # Build matrix. FeatureMatrix._build_matrix(self)
def _add_features(self): # # Add past susceptibility readings self._add_susc_features() # # Add past antibiotic use as features self._add_med_features() # # Add lab panel order features. for panel in self._lab_features: self._factory.addClinicalItemFeatures([panel], features="pre") FeatureMatrix._add_features(self, index_time_col='shifted_order_time')
def _add_features(self): # Add lab panel order features. self._factory.addClinicalItemFeatures([self._lab_panel], features="pre") # Add lab component result features, for a variety of time deltas. LAB_PRE_TIME_DELTAS = [datetime.timedelta(-14)] LAB_POST_TIME_DELTA = datetime.timedelta(0) log.info('Adding lab component features...') for pre_time_delta in LAB_PRE_TIME_DELTAS: log.info('\t%s' % pre_time_delta) self._factory.addLabResultFeatures(self._lab_components, False, pre_time_delta, LAB_POST_TIME_DELTA) FeatureMatrix._add_features(self, index_time_col='order_time')
def __init__(self, lab_var, num_episodes, random_state=None, isLabPanel=True, timeLimit=None, notUsePatIds=None): FeatureMatrix.__init__(self, lab_var, num_episodes) self._isLabPanel = isLabPanel if isLabPanel: self._varTypeInTable = 'proc_code' self._ylabel = 'all_components_normal' else: self._varTypeInTable = 'base_name' self._ylabel = 'component_normal' # Parse arguments. self._lab_var = lab_var self._num_requested_episodes = num_episodes self._num_reported_episodes = 0 # SQLite's random() function does not support a seed value. if random_state: self._random_state = random_state if LocalEnv.DATABASE_CONNECTOR_NAME == 'psycopg2': query = SQLQuery() query.addSelect('setseed(%d);' % random_state) DBUtil.execute(query) self._time_limit = timeLimit if notUsePatIds: self._notUsePatIds = list(notUsePatIds) else: self._notUsePatIds = [] # Query patient episodes. self._query_patient_episodes() episodes = self._factory.getPatientEpisodeIterator() patients = set() for episode in episodes: patient_id = int(episode[self._factory.patientEpisodeIdColumn]) patients.add(patient_id) self._num_patients = len(patients) # Add features. self._add_features() # Build matrix. FeatureMatrix._build_matrix(self)
def __init__(self, lab_panel, lab_features, num_episodes, random_state=None): FeatureMatrix.__init__(self, lab_panel, num_episodes) # Parse arguments. self._lab_panel = lab_panel self._lab_features = lab_features self._med_panel = [ ['Cefepime (Oral)', 'Cefepime (Intravenous)'], ['Cefazolin (Oral)', 'Cefazolin (Intravenous)'], ['Ceftriaxone (Oral)', 'Ceftriaxone (Intravenous)'], ['Meropenem (Oral)', 'Meropenem (Intravenous)'], ['Vancomycin (Oral)', 'Vancomycin (Intravenous)'], ['Linezolid (Oral)', 'Linezolid (Intravenous)'], ['Daptomycin (Oral)', 'Daptomycin (Intravenous)'], ['Levofloxacin (Oral)', 'Levofloxacin (Intravenous)'], ['Ciprofloxacin (Oral)', 'Ciprofloxacin (Intravenous)'], ['Ampicillin (Oral)', 'Ampicillin (Intravenous)'], ['Metronidazole (Oral)', 'Metronidazole (Intravenous)'], ['Caspofungin (Oral)', 'Caspofungin (Intravenous)'] ] # susceptibility_df = pd.read_csv('/Users/conorcorbin/repos/CDSS/scripts/LabCulturePrediction/Susceptibility_Feature_Names.csv') susceptibility_df = pd.read_csv( '/home/ec2-user/CDSS/scripts/LabCulturePrediction/Susceptibility_Feature_Names.csv' ) self._susceptibility_names = susceptibility_df['name'].values self._num_requested_episodes = num_episodes self._num_reported_episodes = 0 # Query patient episodes. self._query_patient_episodes() episodes = self._factory.getPatientEpisodeIterator() patients = set() for episode in episodes: patient_id = int(episode[self._factory.patientEpisodeIdColumn]) patients.add(patient_id) self._num_patients = len(patients) # Add features. self._add_features() # Build matrix. FeatureMatrix._build_matrix(self)
def _build_matrix_header(self, matrix_path): params = {} params['matrix_path'] = matrix_path params['matrix_module'] = inspect.getfile(inspect.currentframe()) params['data_overview'] = self._build_data_overview() params['field_summary'] = self._build_field_summary() params['include_lab_suffix_summary'] = True params['include_clinical_item_suffix_summary'] = True return FeatureMatrix._build_matrix_header(self, params)
def _add_features(self): # Add past susceptibility readings self._add_susc_features() # Add past antibiotic use as features self._add_med_features() # Add lab panel order features. self._factory.addClinicalItemFeatures(self._lab_panel, features="pre") # Add lab component result features, for a variety of time deltas. LAB_PRE_TIME_DELTAS = [datetime.timedelta(-14)] LAB_POST_TIME_DELTA = datetime.timedelta(0) log.info('Adding lab component features...') for pre_time_delta in LAB_PRE_TIME_DELTAS: log.info('\t%s' % pre_time_delta) self._factory.addLabResultFeatures(self._lab_components, False, pre_time_delta, LAB_POST_TIME_DELTA) FeatureMatrix._add_features(self, index_time_col='shifted_order_time')
def __init__(self, lab_panel, num_episodes): FeatureMatrix.__init__(self, lab_panel, num_episodes) # Parse arguments. self._lab_panel = lab_panel self._num_requested_episodes = num_episodes self._num_reported_episodes = 0 # Query patient episodes. self._query_patient_episodes() episodes = self._factory.getPatientEpisodeIterator() patients = set() for episode in episodes: patient_id = int(episode[self._factory.patientEpisodeIdColumn]) patients.add(patient_id) self._num_patients = len(patients) # Add features. self._add_features() # Build matrix. FeatureMatrix._build_matrix(self)
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() self.usedPatIds = random_patient_list[:] # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': # TODO: component query = SQLQuery() query.addSelect('CAST(pat_id AS BIGINT)') query.addSelect('sop.order_proc_id AS order_proc_id') query.addSelect('proc_code') #TODO:sx query.addSelect('order_time') query.addSelect( "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel" ) #sx query.addSelect( "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components" ) #sx query.addSelect( "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components" ) #sx query.addSelect( "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal" ) #sx query.addFrom('stride_order_proc AS sop') #sx query.addFrom('stride_order_results AS sor') #sx query.addWhere('sop.order_proc_id = sor.order_proc_id') #sx query.addWhere( "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)" ) query.addWhereIn("proc_code", [self._lab_var]) #sx query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('sop.order_proc_id') #sx query.addGroupBy('proc_code') #sx query.addGroupBy('order_time') query.addGroupBy('abnormal_yn') #sx query.addOrderBy('pat_id') query.addOrderBy('sop.order_proc_id') #sx query.addOrderBy('proc_code') #sx query.addOrderBy('order_time') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, index_time_col='order_time') elif LocalEnv.DATASET_SOURCE_NAME == 'UMich': # query = SQLQuery() # query.addSelect('CAST(pat_id AS BIGINT) AS pat_id') # query.addSelect('order_proc_id') # query.addSelect(self._varTypeInTable) # query.addSelect('order_time') # # query.addWhereIn(self._varTypeInTable, [self._lab_var]) # # if self._isLabPanel: # # query.addSelect("SUM(CASE WHEN result_in_range_yn IN ('N', 'Y') THEN 1 ELSE 0 END) AS num_components") # query.addSelect("SUM(CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END) AS num_normal_components") # query.addSelect("CAST(SUM(CASE WHEN result_in_range_yn = 'N' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal") #TODO # else: # query.addSelect("CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal") # # query.addFrom('labs') # query.addWhereIn("pat_id", random_patient_list) # # query.addGroupBy('pat_id') # query.addGroupBy('order_proc_id') # query.addGroupBy(self._varTypeInTable) # query.addGroupBy('order_time') # # query.addOrderBy('pat_id') # query.addOrderBy('order_proc_id') # query.addOrderBy(self._varTypeInTable) # query.addOrderBy('order_time') # # query.setLimit(self._num_requested_episodes) # # print query query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable if not self._isLabPanel: query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal " else: query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal " # query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal " query_str += "FROM labs " query_str += "WHERE %s = '%s' " % (self._varTypeInTable, self._lab_var) query_str += "AND pat_id IN " pat_list_str = "(" for pat_id in random_patient_list: pat_list_str += str(pat_id) + "," pat_list_str = pat_list_str[:-1] + ") " query_str += pat_list_str query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "LIMIT %d" % self._num_requested_episodes self._num_reported_episodes = FeatureMatrix._querystr_patient_episodes( self, query_str, index_time_col='order_time')
def write_matrix(self, dest_path): log.info('Writing %s...' % dest_path) header = self._build_matrix_header(dest_path) FeatureMatrix.write_matrix(self, dest_path, header)
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. query = """SELECT pat_anon_id as pat_id, pat_enc_csn_anon_id, shifted_order_time, order_proc_anon_id, organism_name, proc_code, MAX(CASE WHEN antibiotic_name = 'Ampicillin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ampicillin_susc, MAX(CASE WHEN antibiotic_name = 'Ampicillin/Sulbactam' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ampicillin_sulbactam_susc, MAX(CASE WHEN antibiotic_name = 'Piperacillin/Tazobactam' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as piperacillin_tazobactam_susc, MAX(CASE WHEN antibiotic_name in ('Cefazolin', 'Cefazolin..') and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as cefazolin_susc, MAX(CASE WHEN antibiotic_name in ('Ceftriaxone', 'Ceftriaxone.') and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ceftriaxone_susc, MAX(CASE WHEN antibiotic_name = 'Cefepime' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as cefepime_susc, MAX(CASE WHEN antibiotic_name in ('Aztreonam', 'Aztreonam.') and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as aztreonam_susc, MAX(CASE WHEN antibiotic_name = 'Imipenem' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as imipenem_susc, MAX(CASE WHEN antibiotic_name = 'Meropenem' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as meropenem_susc, MAX(CASE WHEN antibiotic_name = 'Gentamicin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as gentamicin_susc, MAX(CASE WHEN antibiotic_name = 'Tobramycin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as tobramycin_susc, MAX(CASE WHEN antibiotic_name = 'Amikacin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as amikacin_susc, MAX(CASE WHEN antibiotic_name = 'Ciprofloxacin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as ciprofloxacin_susc, MAX(CASE WHEN antibiotic_name = 'Levofloxacin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as levofloxacin_susc, MAX(CASE WHEN antibiotic_name = 'Trimethoprim/Sulfamethoxazole.' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as trimethoprim_sulfamethoxazole_susc, MAX(CASE WHEN antibiotic_name = 'Nitrofurantoin' and suseptibility in ('Positive', 'Susceptible') then 1 else 0 end) as nitrofurantoin_susc, MAX(CASE WHEN antibiotic_name = 'Ampicillin' then 1 else 0 end) as ampicillin_tested, MAX(CASE WHEN antibiotic_name = 'Ampicillin/Sulbactam' then 1 else 0 end) as ampicillin_sulbactam_tested, MAX(CASE WHEN antibiotic_name = 'Piperacillin/Tazobactam' then 1 else 0 end) as piperacillin_tazobactam_tested, MAX(CASE WHEN antibiotic_name in ('Cefazolin', 'Cefazolin..') then 1 else 0 end) as cefazolin_tested, MAX(CASE WHEN antibiotic_name in ('Ceftriaxone', 'Ceftriaxone.') then 1 else 0 end) as ceftriaxone_tested, MAX(CASE WHEN antibiotic_name = 'Cefepime' then 1 else 0 end) as cefepime_tested, MAX(CASE WHEN antibiotic_name in ('Aztreonam', 'Aztreonam.') then 1 else 0 end) as aztreonam_tested, MAX(CASE WHEN antibiotic_name = 'Imipenem' then 1 else 0 end) as imipenem_tested, MAX(CASE WHEN antibiotic_name = 'Meropenem' then 1 else 0 end) as meropenem_tested, MAX(CASE WHEN antibiotic_name = 'Gentamicin' then 1 else 0 end) as gentamicin_tested, MAX(CASE WHEN antibiotic_name = 'Tobramycin' then 1 else 0 end) as tobramycin_tested, MAX(CASE WHEN antibiotic_name = 'Amikacin' then 1 else 0 end) as amikacin_tested, MAX(CASE WHEN antibiotic_name = 'Ciprofloxacin' then 1 else 0 end) as ciprofloxacin_tested, MAX(CASE WHEN antibiotic_name = 'Levofloxacin' then 1 else 0 end) as levofloxacin_tested, MAX(CASE WHEN antibiotic_name = 'Trimethoprim/Sulfamethoxazole.' then 1 else 0 end) as trimethoprim_sulfamethoxazole_tested, MAX(CASE WHEN antibiotic_name = 'Nitrofurantoin' then 1 else 0 end) as nitrofurantoin_tested FROM stride_culture_micro WHERE antibiotic_name in ('Ampicillin', 'Ampicillin/Sulbactam', 'Piperacillin/Tazobactam', 'Cefazolin', 'Cefazolin..', 'Ceftriaxone', 'Ceftriaxone.', 'Cefepime', 'Aztreonam', 'Aztreonam.', 'Imipenem', 'Meropenem', 'Gentamicin', 'Tobramycin', 'Amikacin', 'Ciprofloxacin', 'Levofloxacin', 'Trimethoprim/Sulfamethoxazole.', 'Nitrofurantoin') AND organism_name in ('ESCHERICHIA COLI', 'KLEBSIELLA PNEUMONIAE', 'PSEUDOMONAS AERUGINOSA', 'PROTEUS MIRABILIS') GROUP BY pat_id, pat_enc_csn_anon_id, shifted_order_time, order_proc_anon_id, organism_name, proc_code ORDER BY pat_id, pat_enc_csn_anon_id, shifted_order_time """ self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, pat_id_col='pat_id', index_time_col='shifted_order_time')
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': query = SQLQuery() ''' pat_id: hashed patient id ''' query.addSelect('CAST(pat_id AS BIGINT) as pat_id') ''' order_proc_id: unique identifier for an episode ''' query.addSelect('sop.order_proc_id') ''' self._varTypeInTable: usually proc_code or base_name, the column of the lab to be queried ''' query.addSelect(self._varTypeInTable) ''' order_time: The time of the order. Note that sor table does not have this info. ''' query.addSelect('order_time') ''' y-labels related columns, choose one to predict (for now, use all_components_normal to predict). ''' if self._isLabPanel: query.addSelect( "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel" ) # query.addSelect( "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components" ) # sx query.addSelect( "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components" ) # sx query.addSelect( "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal" ) # sx else: query.addSelect( "CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal" ) ''' Relevant tables. Note that sor table does not have patient_id info; need to join sop to obtain it. ''' query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ''' Condition: self._time_limit[0] < order_time < self._time_limit[1] ''' if self._time_limit: if self._time_limit[0]: query.addWhere("sop.order_time > '%s'" % self._time_limit[0]) if self._time_limit[1]: query.addWhere("sop.order_time < '%s'" % self._time_limit[1]) query.addWhere( "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)" ) query.addWhereIn(self._varTypeInTable, [self._lab_var]) # sx query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('sop.order_proc_id') query.addGroupBy(self._varTypeInTable) query.addGroupBy('order_time') if not self._isLabPanel: query.addGroupBy('result_flag') query.addGroupBy('abnormal_yn') # query.addOrderBy('pat_id') query.addOrderBy('sop.order_proc_id') query.addOrderBy(self._varTypeInTable) query.addOrderBy('order_time') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, index_time_col='order_time') else: ''' Sqlite3 has an interesting limit for the total number of place_holders in a query, and this limit varies across platforms/operating systems (500-99999 on mac, 999 by defaulty). To avoid this problem when querying 1000-10000 patient ids, use string queries instead of the default (convenient) routine in DBUtil. ''' query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable if not self._isLabPanel: query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal " else: query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal " query_str += "FROM labs " query_str += "WHERE %s = '%s' " % (self._varTypeInTable, self._lab_var) query_str += "AND pat_id IN " pat_list_str = "(" for pat_id in random_patient_list: pat_list_str += str(pat_id) + "," pat_list_str = pat_list_str[:-1] + ") " query_str += pat_list_str query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "LIMIT %d" % self._num_requested_episodes self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query_str, index_time_col='order_time')
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() #print("RANDOM PATIENT LIST", random_patient_list) # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. query = SQLQuery() query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id') query.addSelect('CAST(shifted_order_time AS TIMESTAMP)') query.addSelect('stride_culture_micro.proc_code') query.addSelect('organism_name') #one for the result # Let us look at top 10 commonly occuring bacteria query.addSelect( "CASE WHEN organism_name IS NULL THEN 1 ELSE 0 END AS NO_BACTERIA") query.addSelect( "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI" ) query.addSelect( "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES" ) query.addSelect( "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE" ) query.addSelect( "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA" ) query.addSelect( "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS" ) query.addSelect( "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS" ) query.addSelect( "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS" ) query.addFrom('stride_culture_micro') query.addWhereIn("stride_culture_micro.proc_code", [self._lab_panel]) query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('shifted_order_time') query.addGroupBy('stride_culture_micro.proc_code') query.addGroupBy('organism_name') query.addOrderBy('pat_id') query.addOrderBy('shifted_order_time') query.addOrderBy('stride_culture_micro.proc_code') query.addOrderBy('organism_name') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, pat_id_col='pat_id', index_time_col='shifted_order_time')
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() #print("RANDOM PATIENT LIST", random_patient_list) # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. query = SQLQuery() query.addSelect('CAST(pat_anon_id AS BIGINT) AS pat_id') query.addSelect('CAST(shifted_order_time AS TIMESTAMP)') query.addSelect('stride_culture_micro.proc_code') query.addSelect('organism_name') #one for the result # Experimenting susceptibility_flags = [ 'Trimethoprim/Sulfamethoxazole', 'Vancomycin', 'Penicillin', 'Levofloxacin', 'Clindamycin', 'Ceftriaxone', 'Erythromycin', 'Ampicillin', 'Meropenem', 'Ciprofloxacin', 'Cefepime', 'Aztreonam.', 'Ampicillin/Sulbactam', 'Piperacillin/Tazobactam', 'Linezolid', 'Oxacillin.', 'Cefazolin', 'Daptomycin' ] for med in susceptibility_flags: med_col = med.replace('/', '_').replace('.', '') query.addSelect( "MAX(CASE WHEN antibiotic_name = '%s' AND (suseptibility = 'Susceptible' OR suseptibility = 'Positive') THEN 1 ELSE 0 END) AS %s_Susc" % (med, med_col)) query.addSelect( "MAX(CASE WHEN antibiotic_name = '%s' THEN 1 ELSE 0 END) as %s_tested" % (med, med_col)) # Let us look at top 10 commonly occuring bacteria query.addSelect( "CASE WHEN organism_name IS NULL THEN 0 ELSE 1 END AS BACTERIA_PRESENT" ) query.addSelect( "CASE WHEN organism_name = 'ESCHERICHIA COLI' THEN 1 ELSE 0 END AS ESCHERICHIA_COLI" ) query.addSelect( "CASE WHEN organism_name = 'STAPHYLOCOCCUS AUREUS' THEN 1 ELSE 0 END AS STAPHYLOCOCCUS_AUREUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS SPECIES' THEN 1 ELSE 0 END AS ENTEROCOCCUS_SPECIES" ) query.addSelect( "CASE WHEN organism_name = 'KLEBSIELLA PNEUMONIAE' THEN 1 ELSE 0 END AS KLEBSIELLA_PNEUMONIAE" ) query.addSelect( "CASE WHEN organism_name = 'PSEUDOMONAS AERUGINOSA' THEN 1 ELSE 0 END AS PSEUDOMONAS_AERUGINOSA" ) query.addSelect( "CASE WHEN organism_name = 'COAG NEGATIVE STAPHYLOCOCCUS' THEN 1 ELSE 0 END AS COAG_NEGATIVE_STAPHYLOCOCCUS" ) query.addSelect( "CASE WHEN organism_name = 'ENTEROCOCCUS FAECALIS' THEN 1 ELSE 0 END AS ENTEROCOCCUS_FAECALIS" ) query.addSelect( "CASE WHEN organism_name = 'PROTEUS MIRABILIS' THEN 1 ELSE 0 END AS PROTEUS_MIRABILIS" ) query.addSelect( "CASE WHEN organism_name = 'CANDIDA ALBICANS' THEN 1 ELSE 0 END AS CANDIDA_ALBICANS" ) query.addFrom('stride_culture_micro') query.addWhereIn("stride_culture_micro.proc_code", self._lab_panel) query.addWhereIn("pat_anon_id", random_patient_list) query.addGroupBy('pat_anon_id') query.addGroupBy('shifted_order_time') query.addGroupBy('stride_culture_micro.proc_code') query.addGroupBy('organism_name') query.addOrderBy('pat_anon_id') query.addOrderBy('shifted_order_time') query.addOrderBy('stride_culture_micro.proc_code') query.addOrderBy('organism_name') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, pat_id_col='pat_id', index_time_col='shifted_order_time')
def _query_patient_episodes(self, pat_id_col="pat_deid"): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() print("RANDOM PATIENT LIST", random_patient_list) # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. query = SQLQuery() #query.addSelect('CAST(micro.pat_deid AS BIGINT)') query.addSelect('CAST(micro.order_time AS TIMESTAMP)') #query.addSelect('sop.order_proc_id AS order_proc_id') query.addSelect('proc_code') query.addSelect('order_time') query.addSelect('organism_name') #one for the result # query.addSelect("CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel") # query.addSelect("SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components") # query.addSelect("SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components") # query.addSelect("CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal") # query.addFrom('stride_order_proc AS sop') # query.addFrom('stride_order_results AS sor') query.addFrom('micro') #query.addWhere('sop.order_proc_id = sor.order_proc_id') #query.addWhere("(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)") #query.addWhereIn("micro.proc_code", [self._lab_panel]) query.addWhereIn("micro.pat_deid", random_patient_list) query.addGroupBy('micro.pat_deid') #query.addGroupBy('sop.order_proc_id') query.addGroupBy('proc_code') query.addGroupBy('order_time') query.addGroupBy('organism_name') #query.addGroupBy('abnormal_yn') query.addOrderBy('micro.pat_deid') #query.addOrderBy('sop.order_proc_id') query.addOrderBy('proc_code') query.addOrderBy('micro.order_time') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes(self, query, index_time_col='order_time')
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. query = """SELECT DISTINCT ON(labels.pat_id, labels.pat_enc_csn_id) labels.pat_id, labels.pat_enc_csn_id, labels.order_time, labels.MRSA_PRESENT FROM (SELECT vanco_zosyn.pat_id, vanco_zosyn.pat_enc_csn_id, vanco_zosyn.order_time, MAX(CASE WHEN micro.organism_name = 'STAPHYLOCOCCUS AUREUS' AND (micro.antibiotic_name = 'Oxacillin' OR micro.antibiotic_name = 'Oxacillin.') AND micro.suseptibility = 'Resistant' THEN 1 ELSE 0 END) as MRSA_PRESENT FROM (SELECT pip_tazo.pat_id, pip_tazo.pat_enc_csn_id, pip_start_time AS order_time FROM (SELECT pat_id, pat_enc_csn_id, start_taking_time AS pip_start_time, end_taking_time AS pip_end_time, description AS pip_description FROM stride_order_med WHERE description LIKE '%PIPERACILLIN-TAZOBACTAM%') pip_tazo INNER JOIN (SELECT pat_id, pat_enc_csn_id, start_taking_time AS vanco_start_time, end_taking_time AS vanco_end_time, description AS vanco_desciption FROM stride_order_med WHERE description LIKE '%VANCOMYCIN%') vanco ON pip_tazo.pat_enc_csn_id = vanco.pat_enc_csn_id AND pip_start_time = vanco_start_time) vanco_zosyn INNER JOIN (SELECT pat_anon_id, pat_enc_csn_anon_id, proc_code, description, organism_name, antibiotic_name, suseptibility, shifted_order_time FROM stride_culture_micro) micro ON vanco_zosyn.pat_id = micro.pat_anon_id AND vanco_zosyn.pat_enc_csn_id = micro.pat_enc_csn_anon_id WHERE DATE_PART('day', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) * 24 + DATE_PART('hour', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) >= 0 AND DATE_PART('day', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) * 24 + DATE_PART('hour', vanco_zosyn.order_time - CAST(micro.shifted_order_time AS timestamp)) <= 24 GROUP BY pat_id, pat_enc_csn_id, order_time) as labels ORDER BY pat_id, pat_enc_csn_id, order_time """ self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, pat_id_col='pat_id', index_time_col='order_time')