def queryOutpatientIronRx(outputFile, patientById): log.info("Query outpatient Iron prescriptions") # Medication IDs derived by mapping through Iron as an ingredient poIronIngredientMedicationIds = (3065, 3066, 3067, 3071, 3074, 3077, 3986, 7292, 11050, 25006, 26797, 34528, 39676, 78552, 79674, 83568, 84170, 85151, 96118, 112120, 112395, 113213, 126035, 198511, 200455, 201994, 201995, 203679, 207059, 207404, 208037, 208072) # Medication IDs directly from prescriptions, formulations that did not map through RxNorm poIronDirectMedicationIds = (111354, 540526, 205010, 121171, 111320, 82791, 93962, 201795, 206722, 201068, 116045, 208725, 111341, 206637, 112400, 210256, 77529, 20844, 83798, 205523, 112428, 125474, 111343) allEnteralIronMedicationIds = set(poIronIngredientMedicationIds).union( poIronDirectMedicationIds) formatter = TextResultsFormatter(outputFile) colNames = ["pat_id", "ordering_date"] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_med") query.addWhereIn("medication_id", allEnteralIronMedicationIds) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("ordering_date") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect("count(order_med_id_coded) as nOrders") query.addSelect("om.med_route, om.medication_id, om.med_description") query.addFrom("starr_datalake2018.order_med as om") if options.descriptionPrefix: query.addWhereOp("om.med_description", "like", options.descriptionPrefix + "%%") # Add wildcard to enabe prefix search if options.medRoutes: query.addWhereIn("om.med_route", options.medRoutes.split(",")) query.addGroupBy("om.medication_id, om.med_description, om.med_route") query.addOrderBy("nOrders desc, om.med_description") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect( "cic.description, ci.clinical_item_id, ci.name, ci.description") query.addFrom("clinical_item_category as cic") query.addFrom("clinical_item as ci") query.addWhere( "cic.clinical_item_category_id = ci.clinical_item_category_id") if options.itemPrefix: query.addWhereOp("ci.description", "like", options.itemPrefix + "%%") # Add wildcard to enabe prefix search if options.categoryNames: query.addWhereIn("cic.description", options.categoryNames.split(",")) query.addOrderBy( "cic.description, ci.name, ci.description, ci.clinical_item_id") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def _get_random_patient_list(self): # Initialize DB cursor. cursor = self._connection.cursor() # Get average number of results for this lab test per patient. avg_orders_per_patient = self._get_average_orders_per_patient() log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Get numPatientsToQuery random patients who have gotten test. # TODO(sbala): Have option to feed in a seed for the randomness. query = SQLQuery() query.addSelect('pat_id') query.addFrom('stride_order_proc AS sop') query.addWhereIn('proc_code', [self._lab_panel]) query.addOrderBy('RANDOM()') query.setLimit(self._num_patients) log.debug('Querying random patient list...') results = DBUtil.execute(query) # Get patient list. random_patient_list = [ row[0] for row in results ] return random_patient_list
def queryLabResults(outputFile, patientById): log.info("Query out lab results, takes a while") labBaseNames = \ ( 'ferritin','fe','trfrn','trfsat','ystfrr', 'wbc','hgb','hct','mcv','rdw','plt', 'retic','reticab','ldh','hapto','tbil','ibil','dbil', 'cr','esr','crp' ) formatter = TextResultsFormatter(outputFile) # Query rapid when filter by lab result type, limited to X records. # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table? colNames = [ "pat_id", "base_name", "common_name", "ord_num_value", "reference_unit", "result_flag", "sor.result_time" ] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_results as sor, stride_order_proc as sop") query.addWhere("sor.order_proc_id = sop.order_proc_id") query.addWhereIn("base_name", labBaseNames) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("sor.result_time") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def test_copyPatientTemplate(self): # Copy a patient template, including deep copy of notes, orders, states, but only up to relative time zero newPatientData = { "name": "Template Copy" } templatePatientId = -1 self.testPatientId = self.manager.copyPatientTemplate( newPatientData, templatePatientId) futureTime = 1000000 # Far future time to test that we still only copied the results up to time zero # Verify basic patient information patientCols = ["name", "age_years", "gender", "sim_state_id"] patientModel = self.manager.loadPatientInfo([self.testPatientId])[0] expectedPatientModel = RowItemModel( ["Template Copy", 60, "Female", -1], patientCols) self.assertEqualDict(expectedPatientModel, patientModel, patientCols) # Verify notes dataCols = ["sim_patient_id", "content"] sampleData = self.manager.loadNotes(self.testPatientId, futureTime) verifyData = \ [ RowItemModel([self.testPatientId,"Initial Note"], dataCols), RowItemModel([self.testPatientId,"Initial Note"], dataCols), # Second copy because another state initiation at time zero and negative onset time ] self.assertEqualDictList(verifyData, sampleData, dataCols) # Verify orders dataCols = [ "sim_user_id", "sim_patient_id", "sim_state_id", "clinical_item_id", "relative_time_start", "relative_time_end" ] sampleData = self.manager.loadPatientOrders(self.testPatientId, futureTime, loadActive=None) verifyData = \ [ RowItemModel([-1,self.testPatientId,-1,-15,0,None], dataCols), ] self.assertEqualDictList(verifyData, sampleData, dataCols) # Verify states dataCols = [ "sim_patient_id", "sim_state_id", "relative_time_start", "relative_time_end" ] query = SQLQuery() for dataCol in dataCols: query.addSelect(dataCol) query.addFrom("sim_patient_state") query.addWhereEqual("sim_patient_id", self.testPatientId) query.addOrderBy("relative_time_start") sampleDataTable = DBUtil.execute(query, includeColumnNames=True) sampleData = modelListFromTable(sampleDataTable) verifyData = \ [ RowItemModel([self.testPatientId,-1,-7200,0], dataCols), RowItemModel([self.testPatientId,-1,0,None], dataCols), ] self.assertEqualDictList(verifyData, sampleData, dataCols)
def test_buildFeatureMatrix_prePostFeatures(self): """ Test features parameter in addClinicalItemFeatures which allows client to specify they only want .pre* or .post* columns in feature matrix. """ # Verify FeatureMatrixFactory throws Error if patientEpisodeInput # has not been set. with self.assertRaises(ValueError): self.factory.processPatientEpisodeInput() # Initialize DB cursor. cursor = self.connection.cursor() # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB") patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Set and process patientEpisodeInput. self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() resultEpisodeIterator = self.factory.getPatientEpisodeIterator() resultPatientEpisodes = list() for episode in resultEpisodeIterator: episode["pat_id"] = int(episode["pat_id"]) episode["order_time"] = DBUtil.parseDateValue( episode["order_time"]) resultPatientEpisodes.append(episode) # Add TestItem100 and TestItem200 clinical item data. self.factory.addClinicalItemFeatures(["TestItem100"], features="pre") self.factory.addClinicalItemFeatures(["TestItem200"], features="post") self.factory.buildFeatureMatrix() resultMatrix = self.factory.readFeatureMatrixFile() expectedMatrix = FM_TEST_OUTPUT[ "test_buildFeatureMatrix_prePostFeatures"] self.assertEqualList(resultMatrix[2:], expectedMatrix)
def queryPatientClinicalItemData(self, analysisQuery, conn): """Query for all of the order / item data for each patient noted in the analysisQuery and yield them one list of clinicalItemIds at a time. Generated iterator over 2-ples (patientId, clinicalItemIdList) - Patient ID: ID of the patient for which the currently yielded item intended for - Clinical Item ID List: List of all of the clinical items / orders for this patient ordered by item date (currently excluding those that are off the "default_recommend" / on the "default exclusion" list). """ sqlQuery = SQLQuery(); sqlQuery.addSelect("pi.patient_id"); sqlQuery.addSelect("pi.clinical_item_id"); #sqlQuery.addSelect("pi.item_date"); sqlQuery.addFrom("clinical_item_category as cic"); sqlQuery.addFrom("clinical_item as ci"); sqlQuery.addFrom("patient_item as pi"); sqlQuery.addWhere("cic.clinical_item_category_id = ci.clinical_item_category_id"); sqlQuery.addWhere("ci.clinical_item_id = pi.clinical_item_id"); sqlQuery.addWhereIn("pi.patient_id", analysisQuery.patientIds ); sqlQuery.addOrderBy("pi.patient_id"); sqlQuery.addOrderBy("pi.item_date"); # Execute the actual query for patient order / item data cursor = conn.cursor(); cursor.execute( str(sqlQuery), tuple(sqlQuery.params) ); currentPatientId = None; clinicalItemIdList = list(); row = cursor.fetchone(); while row is not None: (patientId, clinicalItemId) = row; if currentPatientId is None: currentPatientId = patientId; if patientId != currentPatientId: # Changed patient, yield the existing data for the previous patient yield (currentPatientId, clinicalItemIdList); # Update our data tracking for the current patient currentPatientId = patientId; clinicalItemIdList = list(); clinicalItemIdList.append(clinicalItemId); row = cursor.fetchone(); # Yield / return the last patient data yield (currentPatientId, clinicalItemIdList); cursor.close();
def queryClinicalItems(outputFile, clinicalItemIds, patientById): log.info("Query Clinical Items: %s" % str(clinicalItemIds)) formatter = TextResultsFormatter(outputFile) colNames = ["patient_id", "item_date"] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("patient_item") query.addWhereIn("clinical_item_id", clinicalItemIds) query.addWhereIn("patient_id", patientById.viewkeys()) query.addOrderBy("patient_id") query.addOrderBy("item_date") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def _getNonNullLabs(self): query = SQLQuery() # SELECT query.addSelect('proc_code') # FROM query.addFrom('stride_order_proc') # WHERE query.addWhereLike('proc_code', 'LAB%') query.addWhere('abnormal_yn is not null') query.addGroupBy('proc_code') query.addOrderBy('proc_code') results = DBUtil.execute(query) df = pd.DataFrame(results, columns=query.select).to_csv( DATA_FOLDER + 'proc_codes.csv', index=False)
def test_addTimeCycleFeatures(self): """ Test .addTimeCycleFeatures() """ # Initialize DB cursor. cursor = self.connection.cursor() # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB") patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Set and process patientEpisodeInput. self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() # Add time cycle features. self.factory.addTimeCycleFeatures("order_time", "month") self.factory.addTimeCycleFeatures("order_time", "hour") # Verify output. self.factory.buildFeatureMatrix() resultMatrix = self.factory.readFeatureMatrixFile() expectedMatrix = FM_TEST_OUTPUT["test_addTimeCycleFeatures"][ "expectedMatrix"] self.assertEqualTable(expectedMatrix, resultMatrix[2:], precision=5) # Clean up feature matrix. try: os.remove(self.factory.getMatrixFileName()) except OSError: pass
def _getClinicalItemCounts(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('COUNT(' + CLINICAL_ITEM_ID + ') as total') # FROM query.addFrom('patient_item') # OTHER query.addGroupBy(CLINICAL_ITEM_ID) query.addOrderBy('total', dir='desc') print(query) print(query.getParams()) DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=[CLINICAL_ITEM_ID, 'count']).to_csv('data_summary_stats/item_counts.csv', index=False)
def _getComponentItemIds(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') # FROM query.addFrom('clinical_item') # WHERE query.addWhere('clinical_item_category_id = 58') query.addOrderBy('name') results = DBUtil.execute(query) df = pd.DataFrame(results, columns=query.select) df['base_name'] = df['name'].str.replace('\([a-z]*\)', '', case=False).str.strip() df.to_csv(DATA_FOLDER + 'result_ids.csv', index=False)
def _getLabs(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') query.addSelect('description') # FROM query.addFrom('clinical_item') # WHERE query.addWhereLike('name','LAB%') # OTHER query.addOrderBy(CLINICAL_ITEM_ID, dir='asc') print(query) print(query.getParams()) DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('data_summary_stats/labs.csv', index=False)
def _getPatientsComponentsHistories(self, item_ids): query = SQLQuery() # SELECT query.addSelect('patient_id') query.addSelect('name') query.addSelect('item_date') # FROM query.addFrom('clinical_item as ci') query.addFrom('patient_item as pi') # WHERE query.addWhere('ci.clinical_item_id = pi.clinical_item_id') query.addWhereIn('ci.clinical_item_id', item_ids) query.addOrderBy('patient_id') query.addOrderBy('item_date') # print query # print query.getParams() return customDBUtil.execute(query)
def loadNotes(self, patientId, currentTime, conn=None): """Load notes committed up to the given simulation time. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sn.sim_note_id") query.addSelect("sps.sim_patient_id") # Link query.addSelect("sn.sim_state_id") query.addSelect("sn.note_type_id") query.addSelect("sn.author_type_id") query.addSelect("sn.service_type_id") query.addSelect( "(sps.relative_time_start + sn.relative_state_time) as relative_time" ) query.addSelect("sn.content") query.addFrom("sim_note as sn") query.addFrom("sim_patient_state as sps") query.addWhere("sn.sim_state_id = sps.sim_state_id") query.addWhereEqual("sps.sim_patient_id", patientId) # Only unlock notes once traverse expected time query.addWhereOp( "(sps.relative_time_start + sn.relative_state_time)", "<=", currentTime) query.addOrderBy( "(sps.relative_time_start + sn.relative_state_time)") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def clinicalItemSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() #query.openWhereOrClause() for searchField in ("ci.description", ): for searchWord in searchWords: query.addWhereOp( searchField, "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord }) # Prefix search by regular expression #query.closeWhereOrClause() if itemQuery.sourceTables: query.addWhereIn("cic.source_table", itemQuery.sourceTables) if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addWhere( "ci.item_count <> 0" ) # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis) if itemQuery.sortField: query.addOrderBy(itemQuery.sortField) query.addOrderBy("cic.description") query.addOrderBy("ci.name") query.addOrderBy("ci.description") if itemQuery.resultCount is not None: query.limit = itemQuery.resultCount dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def action_default(self): # Convert query category ID(s) into a list, even of size 1 categoryIds = self.requestData["clinical_item_category_id"].split(",") query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("ci.item_count") query.addFrom("clinical_item as ci") query.addWhere("analysis_status = 1") # Ignore specified items query.addWhereIn("ci.clinical_item_category_id", categoryIds) query.addOrderBy(self.requestData["orderBy"]) resultTable = DBUtil.execute(query, includeColumnNames=True) resultModels = modelListFromTable(resultTable) optionValues = [] optionTexts = [] displayFields = ("name", "description", "item_count") for resultModel in resultModels: optionValues.append(str(resultModel["clinical_item_id"])) orderField = self.requestData["orderBy"].split()[0] orderValue = resultModel[orderField] textValueList = [str(orderValue)] for field in displayFields: if field != orderField: textValueList.append(str(resultModel[field])) textValue = str.join(" - ", textValueList) optionTexts.append(textValue) # Conveniently, Python string representation coincides with JavaScript self.requestData["optionValuesJSON"] = str(optionValues) self.requestData["optionTextsJSON"] = str(optionTexts)
def _getNumPatientEncountersByMonth(self): # Build query. # SELECT # CAST(EXTRACT(YEAR FROM noted_date) AS INT) AS admit_year, # CAST(EXTRACT(MONTH FROM noted_date) AS INT) AS admit_month # COUNT(DISTINCT pat_enc_csn_id) AS num_encounters, # FROM # stride_dx_list # WHERE # data_source = 'ADMIT_DX' # GROUP BY # admit_year, # admit_month # ORDER BY # admit_year, # admit_month query = SQLQuery() query.addSelect( "CAST(EXTRACT(YEAR FROM noted_date) AS INT) AS admit_year") query.addSelect( "CAST(EXTRACT(MONTH FROM noted_date) AS INT) AS admit_month") query.addSelect("COUNT(DISTINCT pat_enc_csn_id) AS num_encounters") query.addFrom("stride_dx_list") query.addWhereEqual("data_source", 'ADMIT_DX') query.addGroupBy("admit_year") query.addGroupBy("admit_month") query.addOrderBy("admit_year") query.addOrderBy("admit_month") # Execute query and return results. results = DBUtil.execute(query) encountersPerMonth = list() for row in results: admitYear = row[0] admitMonth = row[1] numEncounters = row[2] encountersPerMonth.append((admitYear, admitMonth, numEncounters)) return encountersPerMonth
def loadPatientOrders(self, patientId, currentTime, loadActive=True, conn=None): """Load orders for the given patient that exist by the specified current time point. loadActive - Specify whether to load active vs. inactive/completed orders. Set to None to load both """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("po.sim_patient_order_id") query.addSelect("po.sim_user_id") query.addSelect("po.sim_patient_id") query.addSelect("po.sim_state_id") query.addSelect("po.clinical_item_id") query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") query.addWhereEqual("sim_patient_id", patientId) query.addWhereOp("relative_time_start", "<=", currentTime) if loadActive: # Filter out inactive orders here. query.openWhereOrClause() query.addWhereOp("relative_time_end", ">", currentTime) query.addWhere("relative_time_end is null") query.closeWhereOrClause() #elif loadActive is not None: # Filter out active orders here. # query.addWhereOp("relative_time_end","<=", currentTime); if loadActive: # Organize currently active orders by category query.addOrderBy("cic.description") query.addOrderBy("ci.description") else: # Otherwise chronologic order query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def loadPendingResultOrders(self, patientId, relativeTime, conn=None): """Load all patient orders at the given relativeTime that are due to yield results, but have not yet. Include an estimate of time until results available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect( "distinct po.clinical_item_id" ) # Distinct so don't report multiple times for panel orders query.addSelect("po.relative_time_start") query.addSelect("po.relative_time_end") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect( "sorm.turnaround_time" ) # Could have different turnaround times for single order if different sub results. Just report each. query.addSelect( "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result" % relativeTime) # Calculate time until expect result query.addFrom("sim_patient_order as po") query.addFrom("clinical_item as ci") query.addFrom("sim_order_result_map as sorm") query.addWhere("po.clinical_item_id = ci.clinical_item_id") query.addWhere("po.clinical_item_id = sorm.clinical_item_id") query.addWhereEqual("sim_patient_id", patientId) # Only catch orders up to the given relativeTime and not cancelled query.addWhereOp("relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("relative_time_end is null") query.addWhereOp("relative_time_end", ">", relativeTime) query.closeWhereOrClause() # Only PENDING orders, so don't report orders who results should already be available query.addWhereOp("sorm.turnaround_time + po.relative_time_start", ">", relativeTime) query.addOrderBy("time_until_result") query.addOrderBy("relative_time_start") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def _getAdmitDxPatientFrequencyRankByYear(self): # Get list of all clinical item IDs matching admit diagnosis. # Get this list in advance to make subsequent query run a bit faster. admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds() # Build query for # of unique patients. # SELECT # ci.name AS icd_code, # ci.description AS admit_dx, # EXTRACT(YEAR FROM pi.item_date) AS admit_year, # COUNT(DISTINCT pi.patient_id) AS num_unique_patients, # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # WHERE # ci.clinical_item_id in (admitDxClinicalItemIds) # GROUP BY # icd_code, # admit_dx, # admit_year # num_unique_patients # ORDER BY # admit_year, # num_unique_patients DESC query = SQLQuery() query.addSelect("ci.name AS icd_code") query.addSelect("ci.description AS admit_dx") query.addSelect("EXTRACT(YEAR FROM pi.item_date) AS admit_year") query.addSelect("COUNT(DISTINCT pi.patient_id) AS num_unique_patients") query.addFrom("patient_item AS pi") query.addJoin("clinical_item AS ci", "pi.clinical_item_id = ci.clinical_item_id") query.addWhereIn("ci.clinical_item_id", admitDxClinicalItemIds) query.addGroupBy("icd_code") query.addGroupBy("admit_dx") query.addGroupBy("admit_year") query.addGroupBy("num_unique_patients") query.addOrderBy("icd_code") query.addOrderBy("admit_year") query.addOrderBy("num_unique_patients DESC") # Execute query. results = DBUtil.execute(query)
def _getPatientsLabsHistories(self, proc_codes): query = SQLQuery() # SELECT query.addSelect('pat_id') query.addSelect('abnormal_yn') query.addSelect('result_time') query.addSelect('proc_code') # FROM query.addFrom('stride_order_proc') # query.addFrom('patient_item as pi') # WHERE query.addWhereEqual('lab_status', 'Final result') # query.addWhereEqual('proc_code', proc_code) query.addWhereIn('proc_code', proc_codes) query.addOrderBy('proc_code') query.addOrderBy('pat_id') query.addOrderBy('result_time') return customDBUtil.execute(query)
def test_draw(self): # Query events by clinical_item_category. # SELECT # pi.patient_id AS sequence_id, # pi.item_date AS event_time, # cic.description AS event_id # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # JOIN # clinical_item_category AS cic # ON # ci.clinical_item_category_id = cic.clinical_item_category_id # ORDER BY # sequence_id, # event_time, # event_id query = SQLQuery() query.addSelect('pi.patient_id AS sequence_id') query.addSelect('pi.item_date AS event_time') query.addSelect('cic.description AS event_id') query.addFrom('patient_item AS pi') query.addJoin('clinical_item AS ci', 'pi.clinical_item_id = ci.clinical_item_id') query.addJoin( 'clinical_item_category AS cic', 'ci.clinical_item_category_id = cic.clinical_item_category_id') query.addOrderBy('sequence_id') query.addOrderBy('event_time') query.addOrderBy('event_id') events = DBUtil.execute(query) # Build graph based on clinical_item_category. categoryDigraph = EventDigraph(events) categoryDigraphVizFileName = "test-category-digraph.png" categoryDigraph.draw(categoryDigraphVizFileName)
def copyPatientTemplate(self, patientData, templatePatientId, conn=None): """Create a new patient record based on the given template patient ID to copy from. Will copy shallow attributes, overridden by any provided in the given patientData, as well as any patient states, notes, or physician orders UP TO (and including) relative time zero, but not subsequent states, notes, or physician orders (the latter is expected to reflect real user interaction records). """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: templatePatientData = DBUtil.loadRecordModelById("sim_patient", templatePatientId, conn=conn) del templatePatientData["sim_patient_id"] # Remove prior ID to allow for new one templatePatientData.update(patientData) # Override with new content (if exists) DBUtil.insertRow("sim_patient", templatePatientData, conn=conn) # Create new patient record patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"), conn=conn)[0][0] # Copy initial template patient states query = SQLQuery() query.addSelect("*") # Copy all columns query.addFrom("sim_patient_state as sps") query.addWhereEqual("sps.sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) nStates = len(dataModels) for i, dataModel in enumerate(dataModels): del dataModel["sim_patient_state_id"] # Discard copied ID to allow new one if i == nStates - 1: del dataModel["relative_time_end"] # Last state. Blank out end time to reflect open ended for simulation dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_state", dataModel, conn=conn) # Copy initial template orders query = SQLQuery() query.addSelect("*") query.addFrom("sim_patient_order as spo") query.addWhereEqual("sim_patient_id", templatePatientId) query.addWhereOp("relative_time_start", "<=", 0) query.addOrderBy("relative_time_start") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) for dataModel in dataModels: del dataModel["sim_patient_order_id"] dataModel["sim_patient_id"] = patientId DBUtil.insertRow("sim_patient_order", dataModel, conn=conn) conn.commit() # Transactional commit for multi-step process return patientId finally: if not extConn: conn.close()
def orderSetSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ic.item_collection_id") query.addSelect("ic.external_id") query.addSelect("ic.name as collection_name") query.addSelect("ic.section") query.addSelect("ic.subgroup") query.addSelect("ci.clinical_item_category_id") query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addFrom("item_collection as ic") query.addFrom("item_collection_item as ici") query.addFrom("clinical_item as ci") query.addWhere("ic.item_collection_id = ici.item_collection_id") query.addWhere("ici.clinical_item_id = ci.clinical_item_id") query.addWhereNotEqual("ic.section", AD_HOC_SECTION) if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() for searchWord in searchWords: query.addWhereOp( "ic.name", "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord}) # Prefix search by regular expression if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addOrderBy("lower(ic.name)") query.addOrderBy("ic.external_id") query.addOrderBy("lower(ic.section)") query.addOrderBy("lower(ic.subgroup)") query.addOrderBy("ci.clinical_item_id") query.addOrderBy("ci.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) # Aggregate up into order sets orderSetModel = None for row in dataModels: if orderSetModel is None or row[ "external_id"] != orderSetModel["external_id"]: if orderSetModel is not None: # Prior order set exists, yield/return it before preparing next one yield orderSetModel orderSetModel = \ { "external_id": row["external_id"], "name": row["collection_name"], "itemList": list(), } orderSetModel["itemList"].append(row) yield orderSetModel # Yield the last processed model finally: if not extConn: conn.close()
def loadResults(self, patientId, relativeTime, conn=None): """Load all results active by the given relativeTime. Will look for sim_patient_state times and sim_patient_order for diagnostic orders, to extrapolate all state-specific results for each order, or using default values if no state specific ones available. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # First query for all expected result labels and states, without state-specific values as # may want outer join behvaior against default state values query = SQLQuery() query.addSelect("distinct spo.sim_state_id") query.addSelect("sr.sim_result_id") query.addSelect("sr.name") query.addSelect("sr.description") query.addSelect("sr.priority") query.addSelect("sr.group_string") query.addSelect("spo.relative_time_start") query.addSelect("sorm.turnaround_time") query.addSelect( "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time" ) query.addFrom("sim_patient_order as spo") query.addFrom("sim_order_result_map as sorm") query.addFrom("sim_result as sr") query.addWhere("spo.clinical_item_id = sorm.clinical_item_id") query.addWhere("sorm.sim_result_id = sr.sim_result_id") query.addWhereEqual("spo.sim_patient_id", patientId) # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time) query.addWhereOp("spo.relative_time_start + sorm.turnaround_time", "<=", relativeTime) query.addOrderBy("result_relative_time") query.addOrderBy("sr.priority") resultTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) # Pass through results to get set of states to search for stateIds = set([DEFAULT_STATE_ID]) # Include default state to fall back on for resultModel in resultModels: stateIds.add(resultModel["sim_state_id"]) # Second query for state-specific values valueQuery = SQLQuery() valueQuery.addSelect("ssr.sim_state_id") valueQuery.addSelect("ssr.sim_result_id") valueQuery.addSelect("ssr.num_value") valueQuery.addSelect("ssr.num_value_noise") valueQuery.addSelect("ssr.text_value") valueQuery.addSelect("ssr.result_flag") valueQuery.addSelect("ssr.clinical_item_id") # Output clinical item if result flag means something valueQuery.addFrom("sim_state_result as ssr") valueQuery.addWhereIn("ssr.sim_state_id", stateIds) valueTable = DBUtil.execute(valueQuery, includeColumnNames=True, conn=conn) valueModels = modelListFromTable(valueTable) # Store in-memory dictionary for rapid cross-referencing "join" to result table valueModelByStateIdByResultId = dict() for valueModel in valueModels: resultId = valueModel["sim_result_id"] stateId = valueModel["sim_state_id"] if resultId not in valueModelByStateIdByResultId: valueModelByStateIdByResultId[resultId] = dict() valueModelByStateIdByResultId[resultId][stateId] = valueModel # Now go back through original results and join up state-specific values, or use default values if needed resultValueModels = list() for resultModel in resultModels: resultId = resultModel["sim_result_id"] stateId = resultModel["sim_state_id"] if resultId in valueModelByStateIdByResultId: valueModelByStateId = valueModelByStateIdByResultId[ resultId] if stateId in valueModelByStateId: # Have a state-specific value, populate that valueModel = valueModelByStateId[stateId] resultModel.update(valueModel) elif DEFAULT_STATE_ID in valueModelByStateId: # No state-specific value, but have a default one to populate instead valueModel = valueModelByStateId[DEFAULT_STATE_ID] resultModel.update(valueModel) resultValueModels.append(resultModel) else: # No result information available, even in default state. Skip these #resultModel["num_value"] = None; #resultModel["num_value_noise"] = None; #resultModel["text_value"] = None; #resultModel["result_flag"] = None; #resultModel["clinical_item_id"] = None; pass return resultValueModels finally: if not extConn: conn.close()
def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None): """Load basic information about the specified patients. Report patient state at given time, or default to time zero """ if relativeTime is None: relativeTime = 0 # Just look for time zero default then extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("sp.sim_patient_id") query.addSelect("sp.name") query.addSelect("sp.age_years") query.addSelect("sp.gender") query.addSelect("s.sim_state_id") query.addSelect("s.name as state_name") query.addSelect("s.description as state_description") query.addSelect("sps.relative_time_start") query.addSelect("sps.relative_time_end") query.addFrom("sim_patient as sp") query.addFrom("sim_patient_state as sps") query.addFrom("sim_state as s") query.addWhere("sp.sim_patient_id = sps.sim_patient_id") query.addWhere("sps.sim_state_id = s.sim_state_id") if patientIds is not None: query.addWhereIn("sp.sim_patient_id", patientIds) # Look for the state that matches the given relative time offset query.addWhereOp("sps.relative_time_start", "<=", relativeTime) query.openWhereOrClause() query.addWhere("sps.relative_time_end is null") query.addWhereOp("sps.relative_time_end", ">", relativeTime) query.closeWhereOrClause() query.addOrderBy("sp.name") dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) if len(dataModels) > 0: # Secondary query to build lookup table of possible state transition options from patient current states subQuery = SQLQuery() subQuery.addSelect("pre_state_id") subQuery.addSelect("post_state_id") subQuery.addSelect("clinical_item_id") subQuery.addSelect("time_trigger") subQuery.addFrom("sim_state_transition as sst") subQuery.addWhereIn( "pre_state_id", columnFromModelList(dataModels, "sim_state_id")) subResults = DBUtil.execute(subQuery, conn=conn) # For each pre-state, track which clinical items or times trigger which post-states postStateIdByItemIdByPreStateId = dict() postStateIdTimeTriggerByPreStateId = dict() for preStateId, postStateId, itemId, timeTrigger in subResults: if preStateId not in postStateIdByItemIdByPreStateId: postStateIdByItemIdByPreStateId[preStateId] = dict() postStateIdByItemIdByPreStateId[preStateId][ itemId] = postStateId if timeTrigger is not None: postStateIdTimeTriggerByPreStateId[preStateId] = ( postStateId, timeTrigger) # Record in patient result models for retrieval for i, dataModel in enumerate(dataModels): patientId = dataModel["sim_patient_id"] stateId = dataModel["sim_state_id"] dataModel["postStateIdByItemId"] = dict() if stateId in postStateIdByItemIdByPreStateId: dataModel[ "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[ stateId] dataModel["postStateIdTimeTriggerByPreStateId"] = dict() if stateId in postStateIdTimeTriggerByPreStateId: dataModel[ "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[ stateId] if dataModel[ "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel: # Check that we haven't passed (and should thus trigger) a time-based state transition (postStateId, timeTrigger) = dataModel["postStateIdTimeTrigger"] preStateTime = dataModel["relative_time_start"] postStateTriggerTime = (preStateTime + timeTrigger) if postStateTriggerTime <= relativeTime: # Trigger state transition just by time elapsed #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId self.recordStateTransition(patientId, stateId, postStateId, postStateTriggerTime, conn=conn) # State change which can yield new triggers, so recursively reload. # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart dataModels[i] = self.loadPatientInfo([patientId], relativeTime, conn=conn)[0] return dataModels finally: if not extConn: conn.close()
DATA_QUERY.addSelect("patient_id") DATA_QUERY.addSelect("clinical_item_category_id") DATA_QUERY.addSelect('name') DATA_QUERY.addSelect("description") # Join DATA_QUERY.addFrom("patient_item") DATA_QUERY.addJoin( "clinical_item", "patient_item.clinical_item_id = clinical_item.clinical_item_id", joinType="INNER") DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Med Univ (Primary)") # Everyone #DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert DATA_QUERY.addOrderBy("patient_id", dir="ASC") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY) unique_patient_ids = {} # output = open("/Users/jwang/Desktop/expert.csv", "w") # outlist = open("/Users/jwang/Desktop/expert_list.csv", "w") output = open("/Users/jwang/Desktop/everyone.csv", "w") #includes experts + trainees who are providing patient care outlist = open("/Users/jwang/Desktop/everyone_list.csv", "w")
def _query_patient_episodes(self): log.info('Querying patient episodes...') # Initialize DB cursor. cursor = self._connection.cursor() # Build parameters for query. self._lab_components = self._get_components_in_lab_panel() random_patient_list = self._get_random_patient_list() # Build SQL query for list of patient episodes. # Note that for 2008-2014 data, result_flag can take on any of the # following values: High, Low, High Panic, Low Panic, # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, * # (NONE): Only 27 lab components can have this flag. None has this # value for more than 5 results, so ignore it. # *: Only 10 lab components can have this flag. Only 6 have it for # >10 tests, and each of them is a microbiology test for which # a flag is less meaningful, e.g. Gram Stain, AFB culture. # Susceptible: Only 15 lab components can have this flag. All of those # only have this value for 2 results, so ignore it. # Resistant: Only 1 lab component can have this flag. Only two results # have this value, so ignore it. # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD) # have this value for thousands of results, so include it. # Negative: Only 10 lab components can have this flag, and all for # less than 5 results, so ignore it. # Positive: Only 3 lab components can have this flag, and all for # only 1 result, so ignore it. # Low Off-Scale: Only 1 lab component can have this flag, and only for # 3 results, so ignore it. # Low Panic: 1401 lab components can have this flag, many core # metabolic components. Include it. # High Panic: 8084 lab components can have this flag, many core # metabolic components. Include it. if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE': query = SQLQuery() ''' pat_id: hashed patient id ''' query.addSelect('CAST(pat_id AS BIGINT) as pat_id') ''' order_proc_id: unique identifier for an episode ''' query.addSelect('sop.order_proc_id') ''' self._varTypeInTable: usually proc_code or base_name, the column of the lab to be queried ''' query.addSelect(self._varTypeInTable) ''' order_time: The time of the order. Note that sor table does not have this info. ''' query.addSelect('order_time') ''' y-labels related columns, choose one to predict (for now, use all_components_normal to predict). ''' if self._isLabPanel: query.addSelect( "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel" ) # query.addSelect( "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components" ) # sx query.addSelect( "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components" ) # sx query.addSelect( "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal" ) # sx else: query.addSelect( "CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal" ) ''' Relevant tables. Note that sor table does not have patient_id info; need to join sop to obtain it. ''' query.addFrom('stride_order_proc AS sop') query.addFrom('stride_order_results AS sor') query.addWhere('sop.order_proc_id = sor.order_proc_id') ''' Condition: self._time_limit[0] < order_time < self._time_limit[1] ''' if self._time_limit: if self._time_limit[0]: query.addWhere("sop.order_time > '%s'" % self._time_limit[0]) if self._time_limit[1]: query.addWhere("sop.order_time < '%s'" % self._time_limit[1]) query.addWhere( "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)" ) query.addWhereIn(self._varTypeInTable, [self._lab_var]) # sx query.addWhereIn("pat_id", random_patient_list) query.addGroupBy('pat_id') query.addGroupBy('sop.order_proc_id') query.addGroupBy(self._varTypeInTable) query.addGroupBy('order_time') if not self._isLabPanel: query.addGroupBy('result_flag') query.addGroupBy('abnormal_yn') # query.addOrderBy('pat_id') query.addOrderBy('sop.order_proc_id') query.addOrderBy(self._varTypeInTable) query.addOrderBy('order_time') query.setLimit(self._num_requested_episodes) self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query, index_time_col='order_time') else: ''' Sqlite3 has an interesting limit for the total number of place_holders in a query, and this limit varies across platforms/operating systems (500-99999 on mac, 999 by defaulty). To avoid this problem when querying 1000-10000 patient ids, use string queries instead of the default (convenient) routine in DBUtil. ''' query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable if not self._isLabPanel: query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal " else: query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal " query_str += "FROM labs " query_str += "WHERE %s = '%s' " % (self._varTypeInTable, self._lab_var) query_str += "AND pat_id IN " pat_list_str = "(" for pat_id in random_patient_list: pat_list_str += str(pat_id) + "," pat_list_str = pat_list_str[:-1] + ") " query_str += pat_list_str query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable query_str += "LIMIT %d" % self._num_requested_episodes self._num_reported_episodes = FeatureMatrix._query_patient_episodes( self, query_str, index_time_col='order_time')