def _getAdmitDateRange(self): # Get list of all clinical item IDs matching admit diagnosis. # Get this list in advance to make subsequent query run a bit faster. admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds() # Build query for earliest and latest admissions. # SELECT # MIN(item_date) AS first_admit_date, # MAX(item_date) AS last_admit_date, # FROM # patient_item # WHERE # clinical_item_id in (admitDxClinicalItemIds) query = SQLQuery() query.addSelect("MIN(item_date) AS first_admit_date") query.addSelect("MAX(item_date) AS last_admit_date") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", admitDxClinicalItemIds) # Execute query and return results. results = self._executeCachedDBQuery(query) firstAdmitDate = DBUtil.parseDateValue(results[0][0]).date() lastAdmitDate = DBUtil.parseDateValue(results[0][1]).date() return firstAdmitDate, lastAdmitDate
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect("count(order_med_id_coded) as nOrders") query.addSelect("om.med_route, om.medication_id, om.med_description") query.addFrom("starr_datalake2018.order_med as om") if options.descriptionPrefix: query.addWhereOp("om.med_description", "like", options.descriptionPrefix + "%%") # Add wildcard to enabe prefix search if options.medRoutes: query.addWhereIn("om.med_route", options.medRoutes.split(",")) query.addGroupBy("om.medication_id, om.med_description, om.med_route") query.addOrderBy("nOrders desc, om.med_description") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def _get_random_patient_list(self): # Initialize DB cursor. cursor = self._connection.cursor() # Get average number of results for this lab test per patient. avg_orders_per_patient = self._get_average_orders_per_patient() log.info('avg_orders_per_patient: %s' % avg_orders_per_patient) # Based on average # of results, figure out how many patients we'd # need to get for a feature matrix of requested size. self._num_patients = int(numpy.max([self._num_requested_episodes / \ avg_orders_per_patient, 1])) # Get numPatientsToQuery random patients who have gotten test. # TODO(sbala): Have option to feed in a seed for the randomness. query = SQLQuery() query.addSelect('pat_id') query.addFrom('stride_order_proc AS sop') query.addWhereIn('proc_code', [self._lab_panel]) query.addOrderBy('RANDOM()') query.setLimit(self._num_patients) log.debug('Querying random patient list...') results = DBUtil.execute(query) # Get patient list. random_patient_list = [ row[0] for row in results ] return random_patient_list
def _get_components_in_lab_panel(self): # Initialize DB connection. cursor = self._connection.cursor() # Doing a single query results in a sequential scan through # stride_order_results. To avoid this, break up the query in two. # First, get all the order_proc_ids for proc_code. query = SQLQuery() query.addSelect('order_proc_id') query.addFrom('stride_order_proc') query.addWhereIn('proc_code', [self._lab_panel]) query.addGroupBy('order_proc_id') log.debug('Querying order_proc_ids for %s...' % self._lab_panel) results = DBUtil.execute(query) lab_order_ids = [row[0] for row in results] # Second, get all base_names from those orders. query = SQLQuery() query.addSelect('base_name') query.addFrom('stride_order_results') query.addWhereIn('order_proc_id', lab_order_ids) query.addGroupBy('base_name') log.debug('Querying base_names for order_proc_ids...') results = DBUtil.execute(query) components = [row[0] for row in results] return components
def queryLabResults(outputFile, patientById): log.info("Query out lab results, takes a while") labBaseNames = \ ( 'ferritin','fe','trfrn','trfsat','ystfrr', 'wbc','hgb','hct','mcv','rdw','plt', 'retic','reticab','ldh','hapto','tbil','ibil','dbil', 'cr','esr','crp' ) formatter = TextResultsFormatter(outputFile) # Query rapid when filter by lab result type, limited to X records. # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table? colNames = [ "pat_id", "base_name", "common_name", "ord_num_value", "reference_unit", "result_flag", "sor.result_time" ] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_results as sor, stride_order_proc as sop") query.addWhere("sor.order_proc_id = sop.order_proc_id") query.addWhereIn("base_name", labBaseNames) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("sor.result_time") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def loadClinicalItemBaseCountByItemId(self, countPrefix=None, acceptCache=True, conn=None): """Helper query to get the baseline analyzed item counts for all of the clinical items If countPrefix is provided, can use alternative total item counts instead of the default item_count, such as patient_count or encounter_count to match the respective association query baselines used. """ extConn = True; if conn is None: conn = self.connFactory.connection(); extConn = False; try: # First ensure the denormalized count data is updated self.updateClinicalItemCounts(acceptCache=acceptCache, conn=conn); if countPrefix is None or countPrefix == "": countPrefix = "item_"; # Default to general item counts, allowing for repeats per patient baseCountQuery = SQLQuery(); baseCountQuery.addSelect("clinical_item_id"); baseCountQuery.addSelect("%scount" % countPrefix); baseCountQuery.addFrom("clinical_item"); if acceptCache: baseCountResultTable = self.executeCacheOption( baseCountQuery, conn=conn ); else: baseCountResultTable = DBUtil.execute( baseCountQuery, conn=conn ); baseCountByItemId = dict(); for (itemId, baseCount) in baseCountResultTable: baseCountByItemId[itemId] = baseCount; return baseCountByItemId; finally: if not extConn: conn.close();
def queryOutpatientIronRx(outputFile, patientById): log.info("Query outpatient Iron prescriptions") # Medication IDs derived by mapping through Iron as an ingredient poIronIngredientMedicationIds = (3065, 3066, 3067, 3071, 3074, 3077, 3986, 7292, 11050, 25006, 26797, 34528, 39676, 78552, 79674, 83568, 84170, 85151, 96118, 112120, 112395, 113213, 126035, 198511, 200455, 201994, 201995, 203679, 207059, 207404, 208037, 208072) # Medication IDs directly from prescriptions, formulations that did not map through RxNorm poIronDirectMedicationIds = (111354, 540526, 205010, 121171, 111320, 82791, 93962, 201795, 206722, 201068, 116045, 208725, 111341, 206637, 112400, 210256, 77529, 20844, 83798, 205523, 112428, 125474, 111343) allEnteralIronMedicationIds = set(poIronIngredientMedicationIds).union( poIronDirectMedicationIds) formatter = TextResultsFormatter(outputFile) colNames = ["pat_id", "ordering_date"] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("stride_order_med") query.addWhereIn("medication_id", allEnteralIronMedicationIds) query.addWhereIn("pat_id", patientById.viewkeys()) query.addOrderBy("pat_id") query.addOrderBy("ordering_date") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def _insertUMichTestRecords(self): db_name = medinfo.db.Env.DB_PARAM['DSN'] db_path = medinfo.db.Env.DB_PARAM['DATAPATH'] conn = sqlite3.connect(db_path + '/' + db_name) table_names = ['labs', 'pt_info', 'demographics', 'encounters', 'diagnoses'] for table_name in table_names: columns = FMTU.FM_TEST_INPUT_TABLES["%s_columns"%table_name] column_types = FMTU.FM_TEST_INPUT_TABLES["%s_column_types"%table_name] df = pd.DataFrame() for one_line in FMTU.FM_TEST_INPUT_TABLES['%s_data'%table_name]: df = df.append(dict(zip(columns, one_line)), ignore_index=True) df.to_sql(table_name, conn, if_exists="append", index=False) # First, write basic (pat_id, order_time) episode information to TempFile # Then, all [[pat_id, event_time]] operations are based on these episodes # i.e., pat_id are all from these pat_ids patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS INTEGER) AS pat_id") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addFrom("labs") self.cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) self.factory.setPatientEpisodeInput(self.cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() resultEpisodeIterator = self.factory.getPatientEpisodeIterator() resultPatientEpisodes = list() for episode in resultEpisodeIterator: episode["pat_id"] = int(episode["pat_id"]) episode["order_time"] = DBUtil.parseDateValue(episode["order_time"]) resultPatientEpisodes.append(episode)
def queryItems(self, options, outputFile): """Query for all clinical item records that fulfill the options criteria and then send the results as tab-delimited output to the outputFile. """ pauseSeconds = float(options.pauseSeconds) query = SQLQuery() query.addSelect( "cic.description, ci.clinical_item_id, ci.name, ci.description") query.addFrom("clinical_item_category as cic") query.addFrom("clinical_item as ci") query.addWhere( "cic.clinical_item_category_id = ci.clinical_item_category_id") if options.itemPrefix: query.addWhereOp("ci.description", "like", options.itemPrefix + "%%") # Add wildcard to enabe prefix search if options.categoryNames: query.addWhereIn("cic.description", options.categoryNames.split(",")) query.addOrderBy( "cic.description, ci.name, ci.description, ci.clinical_item_id") formatter = TextResultsFormatter(outputFile) prog = ProgressDots() for row in DBUtil.execute(query, includeColumnNames=True, connFactory=self.connFactory): formatter.formatTuple(row) time.sleep(pauseSeconds) prog.update() prog.printStatus()
def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute \ ( """delete from patient_item where clinical_item_id in ( select clinical_item_id from clinical_item as ci, clinical_item_category as cic where ci.clinical_item_category_id = cic.clinical_item_category_id and cic.source_table = '%s' ); """ % TEST_SOURCE_TABLE ) DBUtil.execute \ ( """delete from clinical_item where clinical_item_category_id in ( select clinical_item_category_id from clinical_item_category where source_table = '%s' ); """ % TEST_SOURCE_TABLE ) DBUtil.execute( "delete from clinical_item_category where source_table = '%s';" % TEST_SOURCE_TABLE) query = SQLQuery() query.delete = True query.addFrom("stride_patient") query.addWhere("pat_id < 0") DBUtil.execute(query) DBTestCase.tearDown(self)
def loadPatientLastEventTime(self, patientId, conn=None): """Find the last simulated time where the patient received any user orders (or default to time 0). Makes for natural starting point for resuming a simulation. Note: Misses cases where patient state also changed, not triggered by order, but by just time lag. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("max(relative_time_start)") query.addSelect("max(relative_time_end)") query.addSelect( "0" ) # Simply select 0 as default time if don't find any others query.addFrom("sim_patient_order as po") query.addWhereEqual("sim_patient_id", patientId) lastOrderTime = max( value for value in DBUtil.execute(query, conn=conn)[0] if value is not None) return lastOrderTime finally: if not extConn: conn.close()
def querySourceItems(self, convOptions, progress=None, conn=None): """Query the database for list of all source clinical items (culture results, etc.) and yield the results one at a time. If startDate provided, only return items whose occurence date is on or after that date. """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["order_proc_anon_id","pat_anon_id","pat_enc_csn_anon_id","proc_code","organism_name","antibiotic_name","suseptibility", "shifted_result_time"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_culture_micro"); # TODO: FIGURE OUT WHY CAN"T DO >= OPERATION HERE # if convOptions.startDate is not None: # query.addWhereOp("shifted_result_time",">=", convOptions.startDate); # if convOptions.endDate is not None: # query.addWhereOp("shifted_result_time","<", convOptions.endDate); # Still use begin date as common filter value # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); if rowModel['shifted_result_time'] is None: # Don't add if no result time given row = cursor.fetchone(); continue if rowModel['organism_name'] is not None: # if positive culture but results uninterpretable, don't add feature if rowModel['suseptibility'] is None or rowModel['antibiotic_name'] == 'Method' or rowModel['antibiotic_name'] is None: row = cursor.fetchone(); continue # So that we don't run into directory issues later when writing temp files try: rowModel['antibiotic_name'] = rowModel['antibiotic_name'].replace('/', '-') except: # When antibiotic name is none pass yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def generatePatientItemsForCompositeId(self, clinicalItemIds, compositeId, conn=None): """Create patient_item records for the composite to match the given clinical item ID patient items. """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Record linking information for componentId in clinicalItemIds: linkModel = RowItemModel() linkModel["clinical_item_id"] = compositeId linkModel["linked_item_id"] = componentId insertQuery = DBUtil.buildInsertQuery("clinical_item_link", linkModel.keys()) insertParams = linkModel.values() DBUtil.execute(insertQuery, insertParams, conn=conn) # Extract back link information, which will also flatten out any potential inherited links linkedItemIdsByBaseId = self.loadLinkedItemIdsByBaseId(conn=conn) linkedItemIds = linkedItemIdsByBaseId[compositeId] # Create patienItem records for the composite clinical item to overlap existing component ones # First query for the existing component records query = SQLQuery() query.addSelect("*") query.addFrom("patient_item") query.addWhereIn("clinical_item_id", linkedItemIds) results = DBUtil.execute(query, includeColumnNames=True, conn=conn) patientItems = modelListFromTable(results) # Patch component records to instead become composite item records then insert back into database progress = ProgressDots(total=len(patientItems)) for patientItem in patientItems: del patientItem["patient_item_id"] patientItem["clinical_item_id"] = compositeId patientItem["analyze_date"] = None insertQuery = DBUtil.buildInsertQuery("patient_item", patientItem.keys()) insertParams = patientItem.values() try: # Optimistic insert of a new unique item DBUtil.execute(insertQuery, insertParams, conn=conn) except conn.IntegrityError, err: # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible log.info(err) progress.Update() # progress.PrintStatus(); finally: if not extConn: conn.close()
def test_copyPatientTemplate(self): # Copy a patient template, including deep copy of notes, orders, states, but only up to relative time zero newPatientData = { "name": "Template Copy" } templatePatientId = -1 self.testPatientId = self.manager.copyPatientTemplate( newPatientData, templatePatientId) futureTime = 1000000 # Far future time to test that we still only copied the results up to time zero # Verify basic patient information patientCols = ["name", "age_years", "gender", "sim_state_id"] patientModel = self.manager.loadPatientInfo([self.testPatientId])[0] expectedPatientModel = RowItemModel( ["Template Copy", 60, "Female", -1], patientCols) self.assertEqualDict(expectedPatientModel, patientModel, patientCols) # Verify notes dataCols = ["sim_patient_id", "content"] sampleData = self.manager.loadNotes(self.testPatientId, futureTime) verifyData = \ [ RowItemModel([self.testPatientId,"Initial Note"], dataCols), RowItemModel([self.testPatientId,"Initial Note"], dataCols), # Second copy because another state initiation at time zero and negative onset time ] self.assertEqualDictList(verifyData, sampleData, dataCols) # Verify orders dataCols = [ "sim_user_id", "sim_patient_id", "sim_state_id", "clinical_item_id", "relative_time_start", "relative_time_end" ] sampleData = self.manager.loadPatientOrders(self.testPatientId, futureTime, loadActive=None) verifyData = \ [ RowItemModel([-1,self.testPatientId,-1,-15,0,None], dataCols), ] self.assertEqualDictList(verifyData, sampleData, dataCols) # Verify states dataCols = [ "sim_patient_id", "sim_state_id", "relative_time_start", "relative_time_end" ] query = SQLQuery() for dataCol in dataCols: query.addSelect(dataCol) query.addFrom("sim_patient_state") query.addWhereEqual("sim_patient_id", self.testPatientId) query.addOrderBy("relative_time_start") sampleDataTable = DBUtil.execute(query, includeColumnNames=True) sampleData = modelListFromTable(sampleDataTable) verifyData = \ [ RowItemModel([self.testPatientId,-1,-7200,0], dataCols), RowItemModel([self.testPatientId,-1,0,None], dataCols), ] self.assertEqualDictList(verifyData, sampleData, dataCols)
def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None): """Query the database for list of all source clinical items (lab results in this case) and yield the results one at a time. If startDate provided, only return items whose result_time is on or after that date. Only include results records where the result_flag is set to an informative value, to focus only on abnormal lab results (including would be a ton more relatively uninformative data that would greatly expend data space and subsequent computation time) """ extConn = conn is not None; if not extConn: conn = self.connFactory.connection(); # Column headers to query for that map to respective fields in analysis table headers = ["sor.order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "base_name", "component_name", "common_name", "ord_num_value", "result_flag", "result_in_range_yn", "sor.result_time"]; query = SQLQuery(); for header in headers: query.addSelect( header ); query.addFrom("stride_order_proc as sop"); query.addFrom("%s as sor" % SOURCE_TABLE); query.addWhere("sop.order_proc_id = sor.order_proc_id"); #query.addWhere("result_flag <> '*'"); # Will exclude nulls and the uninformative '*' values for text-based microbiology results if startDate is not None: query.addWhereOp("sor.result_time",">=", startDate); if endDate is not None: query.addWhereOp("sor.result_time","<", endDate); # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0]; cursor = conn.cursor(); # Do one massive query, but yield data for one item at a time. cursor.execute( str(query), tuple(query.params) ); row = cursor.fetchone(); while row is not None: rowModel = RowItemModel( row, headers ); # Normalize qualified labels rowModel["order_proc_id"] = rowModel["sor.order_proc_id"]; rowModel["result_time"] = rowModel["sor.result_time"]; if rowModel['base_name'] is None: row = cursor.fetchone() continue self.populateResultFlag(rowModel,conn=conn); yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory row = cursor.fetchone(); # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close(); if not extConn: conn.close();
def updateClinicalItemCounts(self, acceptCache=False, conn=None): """Update the summary item_counts for clinical_items based on clinical_item_association summary counts. If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated" in the data_cache table. If it exists, assume we have done this update already, and no need to force the calculations again """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: if acceptCache: isCacheUpdated = (self.getCacheData( "clinicalItemCountsUpdated", conn=conn) is not None) if isCacheUpdated: # Item count caches already updated, no need to recalculate them return # First reset all counts to zero query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 " params = [] if self.maxClinicalItemId is not None: # Restrict to (test) data query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER params.append(self.maxClinicalItemId) DBUtil.execute(query, params, conn=conn) sqlQuery = SQLQuery() sqlQuery.addSelect("clinical_item_id") sqlQuery.addSelect("count_0 as item_count") sqlQuery.addSelect("patient_count_0 as patient_count") sqlQuery.addSelect("encounter_count_0 as encounter_count") sqlQuery.addFrom("clinical_item_association as ci") sqlQuery.addWhere("clinical_item_id = subsequent_item_id") # Look along "diagonal" of matrix for primary summary stats if self.maxClinicalItemId is not None: # Restrict to (test) data sqlQuery.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) resultTable = DBUtil.execute(sqlQuery, includeColumnNames=True, conn=conn) resultModels = modelListFromTable(resultTable) for result in resultModels: DBUtil.updateRow("clinical_item", result, result["clinical_item_id"], conn=conn) # Make a note that this cache data has been updated self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn) finally: if not extConn: conn.close()
def _getNumPatients(): ADMIT_DX_CATEGORY_ID = 2 query = SQLQuery() query.addSelect() query.addFrom("patient_item") query = \ """ SELECT """ print query
def test_buildFeatureMatrix_prePostFeatures(self): """ Test features parameter in addClinicalItemFeatures which allows client to specify they only want .pre* or .post* columns in feature matrix. """ # Verify FeatureMatrixFactory throws Error if patientEpisodeInput # has not been set. with self.assertRaises(ValueError): self.factory.processPatientEpisodeInput() # Initialize DB cursor. cursor = self.connection.cursor() # Build SQL query for list of patient episodes. patientEpisodeQuery = SQLQuery() patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)") patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id") patientEpisodeQuery.addSelect("proc_code") patientEpisodeQuery.addSelect("order_time") patientEpisodeQuery.addSelect( "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results" ) patientEpisodeQuery.addFrom("stride_order_proc AS sop") patientEpisodeQuery.addFrom("stride_order_results AS sor") patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id") patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB") patientEpisodeQuery.addGroupBy( "pat_id, sop.order_proc_id, proc_code, order_time") patientEpisodeQuery.addOrderBy( "pat_id, sop.order_proc_id, proc_code, order_time") cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params) # Set and process patientEpisodeInput. self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time") self.factory.processPatientEpisodeInput() resultEpisodeIterator = self.factory.getPatientEpisodeIterator() resultPatientEpisodes = list() for episode in resultEpisodeIterator: episode["pat_id"] = int(episode["pat_id"]) episode["order_time"] = DBUtil.parseDateValue( episode["order_time"]) resultPatientEpisodes.append(episode) # Add TestItem100 and TestItem200 clinical item data. self.factory.addClinicalItemFeatures(["TestItem100"], features="pre") self.factory.addClinicalItemFeatures(["TestItem200"], features="post") self.factory.buildFeatureMatrix() resultMatrix = self.factory.readFeatureMatrixFile() expectedMatrix = FM_TEST_OUTPUT[ "test_buildFeatureMatrix_prePostFeatures"] self.assertEqualList(resultMatrix[2:], expectedMatrix)
def clinicalItemSearch(self, itemQuery, conn=None): """Look for clinical items based on specified query criteria""" extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: query = SQLQuery() query.addSelect("ci.clinical_item_id") query.addSelect("ci.name") query.addSelect("ci.description") query.addSelect("cic.source_table") query.addSelect("cic.description as category_description") query.addFrom("clinical_item as ci") query.addFrom("clinical_item_category as cic") query.addWhere( "ci.clinical_item_category_id = cic.clinical_item_category_id") if itemQuery.searchStr is not None: searchWords = itemQuery.searchStr.split() #query.openWhereOrClause() for searchField in ("ci.description", ): for searchWord in searchWords: query.addWhereOp( searchField, "~*", "^%(searchWord)s|[^a-z]%(searchWord)s" % {"searchWord": searchWord }) # Prefix search by regular expression #query.closeWhereOrClause() if itemQuery.sourceTables: query.addWhereIn("cic.source_table", itemQuery.sourceTables) if itemQuery.analysisStatus is not None: query.addWhereEqual("ci.analysis_status", itemQuery.analysisStatus) query.addWhere( "ci.item_count <> 0" ) # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis) if itemQuery.sortField: query.addOrderBy(itemQuery.sortField) query.addOrderBy("cic.description") query.addOrderBy("ci.name") query.addOrderBy("ci.description") if itemQuery.resultCount is not None: query.limit = itemQuery.resultCount dataTable = DBUtil.execute(query, includeColumnNames=True, conn=conn) dataModels = modelListFromTable(dataTable) return dataModels finally: if not extConn: conn.close()
def querySourceItems(self, userSIDs, limit=None, offset=None, progress=None, conn=None): """Query the database for list of all AccessLogs and yield the results one at a time. If userSIDs provided, only return items matching those IDs. """ extConn = conn is not None if not extConn: conn = self.connFactory.connection() # Column headers to query for that map to respective fields in analysis table headers = [ "user_id", "user_name", "de_pat_id", "access_datetime", "metric_id", "metric_name", "line_count", "description", "metric_group_num", "metric_group_name" ] query = SQLQuery() for header in headers: query.addSelect(header) query.addFrom(self.sourceTableName) if userSIDs is not None: query.addWhereIn("user_id", userSIDs) query.setLimit(limit) query.setOffset(offset) # Query to get an estimate of how long the process will be if progress is not None: progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0] cursor = conn.cursor() # Do one massive query, but yield data for one item at a time. cursor.execute(str(query), tuple(query.params)) row = cursor.fetchone() while row is not None: rowModel = RowItemModel(row, headers) yield rowModel row = cursor.fetchone() # Slight risk here. Normally DB connection closing should be in finally of a try block, # but using the "yield" generator construct forbids us from using a try, finally construct. cursor.close() if not extConn: conn.close()
def loadRespiratoryViralPanelItemIds(extractor): # labCategoryId = 6; labCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Lab'")[0][0]; query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereIn("clinical_item_category_id", (labCategoryId,) ); query.addWhere("description ~* '%s'" % 'Respiratory.*Panel' ); respiratoryViralPanelItemIds = set(); for row in DBUtil.execute(query): respiratoryViralPanelItemIds.add(row[0]); return respiratoryViralPanelItemIds;
def deactivateAnalysisByCount(self, thresholdInstanceCount, categoryIds=None, conn=None): """Find clinical items to deactivate, based on their instance (patient_item) counts being too low to be interesting. Can restrict to applying to only items under certain categories. Use data/analysis/queryItemCounts.py to help guide selections with queries like: select count(clinical_item_id), sum(item_count) from clinical_item where item_count > %s and clinical_item_category_id in (%s) (and analysis_status = 1)? Seems like good filter, but the process itself will change this count Direct search option as below, but that's usually for pre-processing before activations even start. Former meant to count records that have already gone through analysis. select clinical_item_id, count(distinct patient_id), count(distinct encounter_id), count(patient_item_id) from patient_item group by clinical_item_id """ extConn = True if conn is None: conn = self.connFactory.connection() extConn = False try: # Make sure clinical item instance (patient item) counts are up to date self.updateClinicalItemCounts(conn=conn) query = SQLQuery() query.addSelect("clinical_item_id") query.addFrom("clinical_item") if self.maxClinicalItemId is not None: # Restrict to limited / test data query.addWhereOp("clinical_item_id", "<", self.maxClinicalItemId) if categoryIds is not None: query.addWhereIn("clinical_item_category_id", categoryIds) query.addWhereOp("item_count", "<=", thresholdInstanceCount) results = DBUtil.execute(query, conn=conn) clinicalItemIds = set() for row in results: clinicalItemIds.add(row[0]) self.deactivateAnalysis(clinicalItemIds, conn=conn) finally: if not extConn: conn.close()
def _getClinicalItems(self): query = SQLQuery() # SELECT query.addSelect(CLINICAL_ITEM_ID) query.addSelect('name') # FROM query.addFrom('clinical_item') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('clinical_items.csv', index=False)
def queryDemographics(patientDF, baseDate): log.info("Populate demographics background for %d patients" % len(patientDF) ); query = SQLQuery(); query.addSelect("pat_id"); query.addSelect("%d-birth_year as age" % baseDate.year ); query.addSelect("gender"); query.addSelect("primary_race"); query.addFrom("stride_patient"); query.addWhereIn("pat_id", patientDF["patientId"] ); results = DBUtil.execute(query); cols = ["patientId","age","gender","race"]; newDF = pd.DataFrame(results,columns=cols); return patientDF.merge(newDF, how="left");
def _getNonNullComponents(self): query = SQLQuery() # SELECT query.addSelect(BASE_NAME) # query.addSelect('max_result_flag') # FROM query.addFrom('order_result_stat') # WHERE query.addWhere('max_result_flag is not null') results = DBUtil.execute(query) pd.DataFrame(results, columns=query.select).to_csv(DATA_FOLDER + 'base_names.csv', index=False)
def loadBloodCultureItemIds(extractor): # microCategoryId = 15; microCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Microbiology'")[0][0]; # Look for diagnostic tests indicating suspected infection / sepsis query = SQLQuery(); query.addSelect("clinical_item_id"); query.addFrom("clinical_item"); query.addWhereEqual("analysis_status", 1); query.addWhereIn("clinical_item_category_id", (microCategoryId,) ); query.addWhere("description ~* '%s'" % 'Blood Culture' ); bloodCultureItemIds = set(); for row in DBUtil.execute(query): bloodCultureItemIds.add(row[0]); return bloodCultureItemIds;
def get_cnt(lab, lab_type, columns): if lab_type == 'panel': query = SQLQuery() for column in columns: query.addSelect(column) query.addFrom('stride_order_proc') query.addWhere("proc_code='%s'" % lab) query.addWhere("order_time >= '%s-01-01'" % str(2014)) query.addWhere("order_time <= '%s-12-31'" % str(2016)) # query.addWhere("order_status = 'Completed'") # TODO: what about "" results = DBUtil.execute(query) return results
def query_for_recent6months(newPatientOnly=True, referral_name=None, include_med=False): query = SQLQuery() query.addSelect('p1.pat_enc_csn_id_coded AS referral_enc_id') query.addSelect('p1.description AS referral_name') query.addSelect('e1.appt_when_jittered AS referral_time') query.addSelect('e1.jc_uid AS pat_id') query.addSelect('d1.icd9 AS referral_icd9') query.addSelect('d1.icd10 AS referral_icd10') query.addSelect('p2.pat_enc_csn_id_coded AS specialty_enc_id') query.addSelect('e2.appt_when_jittered AS specialty_time') query.addSelect('d2.department_name AS specialty_dep') query.addSelect('d2.specialty AS specialty_name') query.addSelect('p2.description AS specialty_order') if include_med: query.addSelect('o2.amb_med_disp_name as medication') query.addFrom('%s AS e1' % ENCOUNTER_TABLE) query.addFrom('%s AS p1' % ORDERPROC_TABLE) query.addFrom('%s AS d1' % DIAGNOSIS_TABLE) query.addFrom('%s AS e2' % ENCOUNTER_TABLE) query.addFrom('%s AS p2' % ORDERPROC_TABLE) query.addFrom('%s AS d2' % DEPMAP_TABLE) query.addFrom('%s AS o2' % ORDERMED_TABLE) query.addWhere("lower(p1.description) like '%referral%'") query.addWhere("p1.pat_enc_csn_id_coded = e1.pat_enc_csn_id_coded") query.addWhere("p1.pat_enc_csn_id_coded = d1.pat_enc_csn_id_coded") query.addWhere("e1.appt_when_jittered >= '2016-01-01'") query.addWhere("e1.appt_when_jittered < '2017-01-01'") query.addWhere("e1.jc_uid = e2.jc_uid") if referral_name: query.addWhere("p1.description = '%s'" % referral_name) query.addWhere("e1.pat_enc_csn_id_coded != e2.pat_enc_csn_id_coded") query.addWhere("e1.appt_when_jittered <= e2.appt_when_jittered") query.addWhere("DATE_ADD(date(timestamp(e1.appt_when_jittered)), INTERVAL 6 month) > date(timestamp(e2.appt_when_jittered))") if newPatientOnly: query.addWhere("e2.visit_type like '%NEW PATIENT%'") query.addWhere("e2.department_id = d2.department_id") query.addWhere("p2.pat_enc_csn_id_coded = e2.pat_enc_csn_id_coded") if include_med: query.addWhere("o2.pat_enc_csn_id_coded = e2.pat_enc_csn_id_coded") return str(query)
def queryClinicalItems(outputFile, clinicalItemIds, patientById): log.info("Query Clinical Items: %s" % str(clinicalItemIds)) formatter = TextResultsFormatter(outputFile) colNames = ["patient_id", "item_date"] query = SQLQuery() for col in colNames: query.addSelect(col) query.addFrom("patient_item") query.addWhereIn("clinical_item_id", clinicalItemIds) query.addWhereIn("patient_id", patientById.viewkeys()) query.addOrderBy("patient_id") query.addOrderBy("item_date") DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
def _getNonNullBaseNames(self): query = SQLQuery() # SELECT query.addSelect(BASE_NAME) query.addSelect('max_result_flag') # FROM query.addFrom('order_result_stat') # WHERE query.addWhere('max_result_flag is not null') print query print query.getParams() DBUtil.runDBScript(self.SCRIPT_FILE, False) results = DBUtil.execute(str(query), query.getParams()) pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)