示例#1
0
def queryOutpatientIronRx(outputFile, patientById):
    log.info("Query outpatient Iron prescriptions")

    # Medication IDs derived by mapping through Iron as an ingredient
    poIronIngredientMedicationIds = (3065, 3066, 3067, 3071, 3074, 3077, 3986,
                                     7292, 11050, 25006, 26797, 34528, 39676,
                                     78552, 79674, 83568, 84170, 85151, 96118,
                                     112120, 112395, 113213, 126035, 198511,
                                     200455, 201994, 201995, 203679, 207059,
                                     207404, 208037, 208072)
    # Medication IDs directly from prescriptions, formulations that did not map through RxNorm
    poIronDirectMedicationIds = (111354, 540526, 205010, 121171, 111320, 82791,
                                 93962, 201795, 206722, 201068, 116045, 208725,
                                 111341, 206637, 112400, 210256, 77529, 20844,
                                 83798, 205523, 112428, 125474, 111343)
    allEnteralIronMedicationIds = set(poIronIngredientMedicationIds).union(
        poIronDirectMedicationIds)

    formatter = TextResultsFormatter(outputFile)

    colNames = ["pat_id", "ordering_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("stride_order_med")
    query.addWhereIn("medication_id", allEnteralIronMedicationIds)
    query.addWhereIn("pat_id", patientById.viewkeys())
    query.addOrderBy("pat_id")
    query.addOrderBy("ordering_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#2
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect("count(order_med_id_coded) as nOrders")
        query.addSelect("om.med_route, om.medication_id, om.med_description")
        query.addFrom("starr_datalake2018.order_med as om")
        if options.descriptionPrefix:
            query.addWhereOp("om.med_description", "like",
                             options.descriptionPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.medRoutes:
            query.addWhereIn("om.med_route", options.medRoutes.split(","))
        query.addGroupBy("om.medication_id, om.med_description, om.med_route")
        query.addOrderBy("nOrders desc, om.med_description")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#3
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect(
            "cic.description, ci.clinical_item_id, ci.name, ci.description")
        query.addFrom("clinical_item_category as cic")
        query.addFrom("clinical_item as ci")
        query.addWhere(
            "cic.clinical_item_category_id = ci.clinical_item_category_id")
        if options.itemPrefix:
            query.addWhereOp("ci.description", "like",
                             options.itemPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.categoryNames:
            query.addWhereIn("cic.description",
                             options.categoryNames.split(","))
        query.addOrderBy(
            "cic.description, ci.name, ci.description, ci.clinical_item_id")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#4
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        avg_orders_per_patient = self._get_average_orders_per_patient()
        log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
        # Based on average # of results, figure out how many patients we'd
        # need to get for a feature matrix of requested size.
        self._num_patients = int(numpy.max([self._num_requested_episodes / \
            avg_orders_per_patient, 1]))

        # Get numPatientsToQuery random patients who have gotten test.
        # TODO(sbala): Have option to feed in a seed for the randomness.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addFrom('stride_order_proc AS sop')
        query.addWhereIn('proc_code', [self._lab_panel])
        query.addOrderBy('RANDOM()')
        query.setLimit(self._num_patients)
        log.debug('Querying random patient list...')
        results = DBUtil.execute(query)

        # Get patient list.
        random_patient_list = [ row[0] for row in results ]

        return random_patient_list
示例#5
0
def queryLabResults(outputFile, patientById):
    log.info("Query out lab results, takes a while")
    labBaseNames = \
    (   'ferritin','fe','trfrn','trfsat','ystfrr',
        'wbc','hgb','hct','mcv','rdw','plt',
        'retic','reticab','ldh','hapto','tbil','ibil','dbil',
        'cr','esr','crp'
    )

    formatter = TextResultsFormatter(outputFile)

    # Query rapid when filter by lab result type, limited to X records.
    # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table?
    colNames = [
        "pat_id", "base_name", "common_name", "ord_num_value",
        "reference_unit", "result_flag", "sor.result_time"
    ]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("stride_order_results as sor, stride_order_proc as sop")
    query.addWhere("sor.order_proc_id = sop.order_proc_id")
    query.addWhereIn("base_name", labBaseNames)
    query.addWhereIn("pat_id", patientById.viewkeys())
    query.addOrderBy("pat_id")
    query.addOrderBy("sor.result_time")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#6
0
    def test_copyPatientTemplate(self):
        # Copy a patient template, including deep copy of notes, orders, states, but only up to relative time zero
        newPatientData = {
            "name": "Template Copy"
        }
        templatePatientId = -1
        self.testPatientId = self.manager.copyPatientTemplate(
            newPatientData, templatePatientId)
        futureTime = 1000000
        # Far future time to test that we still only copied the results up to time zero

        # Verify basic patient information
        patientCols = ["name", "age_years", "gender", "sim_state_id"]
        patientModel = self.manager.loadPatientInfo([self.testPatientId])[0]
        expectedPatientModel = RowItemModel(
            ["Template Copy", 60, "Female", -1], patientCols)
        self.assertEqualDict(expectedPatientModel, patientModel, patientCols)

        # Verify notes
        dataCols = ["sim_patient_id", "content"]
        sampleData = self.manager.loadNotes(self.testPatientId, futureTime)
        verifyData = \
            [   RowItemModel([self.testPatientId,"Initial Note"], dataCols),
                RowItemModel([self.testPatientId,"Initial Note"], dataCols),    # Second copy because another state initiation at time zero and negative onset time
            ]
        self.assertEqualDictList(verifyData, sampleData, dataCols)

        # Verify orders
        dataCols = [
            "sim_user_id", "sim_patient_id", "sim_state_id",
            "clinical_item_id", "relative_time_start", "relative_time_end"
        ]
        sampleData = self.manager.loadPatientOrders(self.testPatientId,
                                                    futureTime,
                                                    loadActive=None)
        verifyData = \
            [   RowItemModel([-1,self.testPatientId,-1,-15,0,None], dataCols),
            ]
        self.assertEqualDictList(verifyData, sampleData, dataCols)

        # Verify states
        dataCols = [
            "sim_patient_id", "sim_state_id", "relative_time_start",
            "relative_time_end"
        ]
        query = SQLQuery()
        for dataCol in dataCols:
            query.addSelect(dataCol)
        query.addFrom("sim_patient_state")
        query.addWhereEqual("sim_patient_id", self.testPatientId)
        query.addOrderBy("relative_time_start")
        sampleDataTable = DBUtil.execute(query, includeColumnNames=True)
        sampleData = modelListFromTable(sampleDataTable)
        verifyData = \
            [   RowItemModel([self.testPatientId,-1,-7200,0], dataCols),
                RowItemModel([self.testPatientId,-1,0,None], dataCols),
            ]
        self.assertEqualDictList(verifyData, sampleData, dataCols)
    def test_buildFeatureMatrix_prePostFeatures(self):
        """
        Test features parameter in addClinicalItemFeatures which allows
        client to specify they only want .pre* or .post* columns in feature
        matrix.
        """
        # Verify FeatureMatrixFactory throws Error if patientEpisodeInput
        # has not been set.
        with self.assertRaises(ValueError):
            self.factory.processPatientEpisodeInput()

        # Initialize DB cursor.
        cursor = self.connection.cursor()

        # Build SQL query for list of patient episodes.
        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)")
        patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id")
        patientEpisodeQuery.addSelect("proc_code")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addSelect(
            "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results"
        )
        patientEpisodeQuery.addFrom("stride_order_proc AS sop")
        patientEpisodeQuery.addFrom("stride_order_results AS sor")
        patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id")
        patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB")
        patientEpisodeQuery.addGroupBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        patientEpisodeQuery.addOrderBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        # Set and process patientEpisodeInput.
        self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(
                episode["order_time"])
            resultPatientEpisodes.append(episode)

        # Add TestItem100 and TestItem200 clinical item data.
        self.factory.addClinicalItemFeatures(["TestItem100"], features="pre")
        self.factory.addClinicalItemFeatures(["TestItem200"], features="post")
        self.factory.buildFeatureMatrix()
        resultMatrix = self.factory.readFeatureMatrixFile()
        expectedMatrix = FM_TEST_OUTPUT[
            "test_buildFeatureMatrix_prePostFeatures"]

        self.assertEqualList(resultMatrix[2:], expectedMatrix)
    def queryPatientClinicalItemData(self, analysisQuery, conn):
        """Query for all of the order / item data for each patient
        noted in the analysisQuery and yield them one list of clinicalItemIds
        at a time.
        Generated iterator over 2-ples (patientId, clinicalItemIdList)
            - Patient ID: ID of the patient for which the currently yielded item intended for
            - Clinical Item ID List:
                List of all of the clinical items / orders for this patient
                ordered by item date (currently excluding those that are off the "default_recommend" / on the "default exclusion" list).
        """
        sqlQuery = SQLQuery();
        sqlQuery.addSelect("pi.patient_id");
        sqlQuery.addSelect("pi.clinical_item_id");
        #sqlQuery.addSelect("pi.item_date");
        sqlQuery.addFrom("clinical_item_category as cic");
        sqlQuery.addFrom("clinical_item as ci");
        sqlQuery.addFrom("patient_item as pi");
        sqlQuery.addWhere("cic.clinical_item_category_id = ci.clinical_item_category_id");
        sqlQuery.addWhere("ci.clinical_item_id = pi.clinical_item_id");

        sqlQuery.addWhereIn("pi.patient_id", analysisQuery.patientIds );

        sqlQuery.addOrderBy("pi.patient_id");
        sqlQuery.addOrderBy("pi.item_date");

        # Execute the actual query for patient order / item data
        cursor = conn.cursor();
        cursor.execute( str(sqlQuery), tuple(sqlQuery.params) );

        currentPatientId = None;
        clinicalItemIdList = list();

        row = cursor.fetchone();
        while row is not None:
            (patientId, clinicalItemId) = row;
            if currentPatientId is None:
                currentPatientId = patientId;

            if patientId != currentPatientId:
                # Changed patient, yield the existing data for the previous patient
                yield (currentPatientId, clinicalItemIdList);
                # Update our data tracking for the current patient
                currentPatientId = patientId;
                clinicalItemIdList = list();

            clinicalItemIdList.append(clinicalItemId);

            row = cursor.fetchone();

        # Yield / return the last patient data
        yield (currentPatientId, clinicalItemIdList);

        cursor.close();
示例#9
0
def queryClinicalItems(outputFile, clinicalItemIds, patientById):
    log.info("Query Clinical Items: %s" % str(clinicalItemIds))
    formatter = TextResultsFormatter(outputFile)

    colNames = ["patient_id", "item_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("patient_item")
    query.addWhereIn("clinical_item_id", clinicalItemIds)
    query.addWhereIn("patient_id", patientById.viewkeys())
    query.addOrderBy("patient_id")
    query.addOrderBy("item_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#10
0
    def _getNonNullLabs(self):
        query = SQLQuery()
        # SELECT
        query.addSelect('proc_code')
        # FROM
        query.addFrom('stride_order_proc')
        # WHERE
        query.addWhereLike('proc_code', 'LAB%')
        query.addWhere('abnormal_yn is not null')

        query.addGroupBy('proc_code')
        query.addOrderBy('proc_code')

        results = DBUtil.execute(query)

        df = pd.DataFrame(results, columns=query.select).to_csv(
            DATA_FOLDER + 'proc_codes.csv', index=False)
    def test_addTimeCycleFeatures(self):
        """
        Test .addTimeCycleFeatures()
        """
        # Initialize DB cursor.
        cursor = self.connection.cursor()

        # Build SQL query for list of patient episodes.
        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)")
        patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id")
        patientEpisodeQuery.addSelect("proc_code")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addSelect(
            "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results"
        )
        patientEpisodeQuery.addFrom("stride_order_proc AS sop")
        patientEpisodeQuery.addFrom("stride_order_results AS sor")
        patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id")
        patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB")
        patientEpisodeQuery.addGroupBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        patientEpisodeQuery.addOrderBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        # Set and process patientEpisodeInput.
        self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()

        # Add time cycle features.
        self.factory.addTimeCycleFeatures("order_time", "month")
        self.factory.addTimeCycleFeatures("order_time", "hour")

        # Verify output.
        self.factory.buildFeatureMatrix()
        resultMatrix = self.factory.readFeatureMatrixFile()
        expectedMatrix = FM_TEST_OUTPUT["test_addTimeCycleFeatures"][
            "expectedMatrix"]
        self.assertEqualTable(expectedMatrix, resultMatrix[2:], precision=5)

        # Clean up feature matrix.
        try:
            os.remove(self.factory.getMatrixFileName())
        except OSError:
            pass
示例#12
0
	def _getClinicalItemCounts(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(CLINICAL_ITEM_ID)
		query.addSelect('COUNT(' + CLINICAL_ITEM_ID + ') as total')
		# FROM
		query.addFrom('patient_item')
		# OTHER
		query.addGroupBy(CLINICAL_ITEM_ID)
		query.addOrderBy('total', dir='desc')

		print(query)
		print(query.getParams())
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=[CLINICAL_ITEM_ID, 'count']).to_csv('data_summary_stats/item_counts.csv', index=False)
    def _getComponentItemIds(self):
        query = SQLQuery()
        # SELECT
        query.addSelect(CLINICAL_ITEM_ID)
        query.addSelect('name')
        # FROM
        query.addFrom('clinical_item')
        # WHERE
        query.addWhere('clinical_item_category_id = 58')
        query.addOrderBy('name')

        results = DBUtil.execute(query)

        df = pd.DataFrame(results, columns=query.select)
        df['base_name'] = df['name'].str.replace('\([a-z]*\)', '',
                                                 case=False).str.strip()
        df.to_csv(DATA_FOLDER + 'result_ids.csv', index=False)
示例#14
0
	def _getLabs(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(CLINICAL_ITEM_ID)
		query.addSelect('name')
		query.addSelect('description')
		# FROM
		query.addFrom('clinical_item')
		# WHERE
		query.addWhereLike('name','LAB%')
		# OTHER
		query.addOrderBy(CLINICAL_ITEM_ID, dir='asc')

		print(query)
		print(query.getParams())
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=query.select).to_csv('data_summary_stats/labs.csv', index=False)
    def _getPatientsComponentsHistories(self, item_ids):
        query = SQLQuery()
        # SELECT
        query.addSelect('patient_id')
        query.addSelect('name')
        query.addSelect('item_date')
        # FROM
        query.addFrom('clinical_item as ci')
        query.addFrom('patient_item as pi')
        # WHERE
        query.addWhere('ci.clinical_item_id = pi.clinical_item_id')
        query.addWhereIn('ci.clinical_item_id', item_ids)

        query.addOrderBy('patient_id')
        query.addOrderBy('item_date')

        # print query
        # print query.getParams()
        return customDBUtil.execute(query)
示例#16
0
    def loadNotes(self, patientId, currentTime, conn=None):
        """Load notes committed up to the given simulation time.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()

            query.addSelect("sn.sim_note_id")
            query.addSelect("sps.sim_patient_id")
            # Link
            query.addSelect("sn.sim_state_id")
            query.addSelect("sn.note_type_id")
            query.addSelect("sn.author_type_id")
            query.addSelect("sn.service_type_id")
            query.addSelect(
                "(sps.relative_time_start + sn.relative_state_time) as relative_time"
            )
            query.addSelect("sn.content")

            query.addFrom("sim_note as sn")
            query.addFrom("sim_patient_state as sps")

            query.addWhere("sn.sim_state_id = sps.sim_state_id")
            query.addWhereEqual("sps.sim_patient_id", patientId)
            # Only unlock notes once traverse expected time
            query.addWhereOp(
                "(sps.relative_time_start + sn.relative_state_time)", "<=",
                currentTime)
            query.addOrderBy(
                "(sps.relative_time_start + sn.relative_state_time)")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#17
0
    def clinicalItemSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                #query.openWhereOrClause()
                for searchField in ("ci.description", ):
                    for searchWord in searchWords:
                        query.addWhereOp(
                            searchField, "~*",
                            "^%(searchWord)s|[^a-z]%(searchWord)s" %
                            {"searchWord": searchWord
                             })  # Prefix search by regular expression
                #query.closeWhereOrClause()
            if itemQuery.sourceTables:
                query.addWhereIn("cic.source_table", itemQuery.sourceTables)
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
                query.addWhere(
                    "ci.item_count <> 0"
                )  # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis)

            if itemQuery.sortField:
                query.addOrderBy(itemQuery.sortField)
            query.addOrderBy("cic.description")
            query.addOrderBy("ci.name")
            query.addOrderBy("ci.description")
            if itemQuery.resultCount is not None:
                query.limit = itemQuery.resultCount
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#18
0
    def action_default(self):
        # Convert query category ID(s) into a list, even of size 1
        categoryIds = self.requestData["clinical_item_category_id"].split(",")

        query = SQLQuery()
        query.addSelect("ci.clinical_item_id")
        query.addSelect("ci.name")
        query.addSelect("ci.description")
        query.addSelect("ci.item_count")
        query.addFrom("clinical_item as ci")
        query.addWhere("analysis_status = 1")
        # Ignore specified items
        query.addWhereIn("ci.clinical_item_category_id", categoryIds)
        query.addOrderBy(self.requestData["orderBy"])

        resultTable = DBUtil.execute(query, includeColumnNames=True)
        resultModels = modelListFromTable(resultTable)

        optionValues = []
        optionTexts = []

        displayFields = ("name", "description", "item_count")

        for resultModel in resultModels:
            optionValues.append(str(resultModel["clinical_item_id"]))

            orderField = self.requestData["orderBy"].split()[0]
            orderValue = resultModel[orderField]
            textValueList = [str(orderValue)]
            for field in displayFields:
                if field != orderField:
                    textValueList.append(str(resultModel[field]))

            textValue = str.join(" - ", textValueList)

            optionTexts.append(textValue)

        # Conveniently, Python string representation coincides with JavaScript
        self.requestData["optionValuesJSON"] = str(optionValues)
        self.requestData["optionTextsJSON"] = str(optionTexts)
示例#19
0
    def _getNumPatientEncountersByMonth(self):
        # Build query.
        # SELECT
        #   CAST(EXTRACT(YEAR FROM noted_date) AS INT) AS admit_year,
        #   CAST(EXTRACT(MONTH FROM noted_date) AS INT) AS admit_month
        #   COUNT(DISTINCT pat_enc_csn_id) AS num_encounters,
        # FROM
        #   stride_dx_list
        # WHERE
        #   data_source = 'ADMIT_DX'
        # GROUP BY
        #   admit_year,
        #   admit_month
        # ORDER BY
        #   admit_year,
        #   admit_month
        query = SQLQuery()
        query.addSelect(
            "CAST(EXTRACT(YEAR FROM noted_date) AS INT) AS admit_year")
        query.addSelect(
            "CAST(EXTRACT(MONTH FROM noted_date) AS INT) AS admit_month")
        query.addSelect("COUNT(DISTINCT pat_enc_csn_id) AS num_encounters")
        query.addFrom("stride_dx_list")
        query.addWhereEqual("data_source", 'ADMIT_DX')
        query.addGroupBy("admit_year")
        query.addGroupBy("admit_month")
        query.addOrderBy("admit_year")
        query.addOrderBy("admit_month")

        # Execute query and return results.
        results = DBUtil.execute(query)
        encountersPerMonth = list()
        for row in results:
            admitYear = row[0]
            admitMonth = row[1]
            numEncounters = row[2]
            encountersPerMonth.append((admitYear, admitMonth, numEncounters))

        return encountersPerMonth
示例#20
0
    def loadPatientOrders(self,
                          patientId,
                          currentTime,
                          loadActive=True,
                          conn=None):
        """Load orders for the given patient that exist by the specified current time point.
        loadActive - Specify whether to load active vs. inactive/completed orders.  Set to None to load both
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("po.sim_patient_order_id")
            query.addSelect("po.sim_user_id")
            query.addSelect("po.sim_patient_id")
            query.addSelect("po.sim_state_id")
            query.addSelect("po.clinical_item_id")
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            query.addWhereEqual("sim_patient_id", patientId)
            query.addWhereOp("relative_time_start", "<=", currentTime)

            if loadActive:  # Filter out inactive orders here.
                query.openWhereOrClause()
                query.addWhereOp("relative_time_end", ">", currentTime)
                query.addWhere("relative_time_end is null")
                query.closeWhereOrClause()
            #elif loadActive is not None:    # Filter out active orders here.
            #    query.addWhereOp("relative_time_end","<=", currentTime);

            if loadActive:  # Organize currently active orders by category
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")
            else:  # Otherwise chronologic order
                query.addOrderBy("relative_time_start")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#21
0
    def loadPendingResultOrders(self, patientId, relativeTime, conn=None):
        """Load all patient orders at the given relativeTime that 
        are due to yield results, but have not yet. 
        Include an estimate of time until results available.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect(
                "distinct po.clinical_item_id"
            )  # Distinct so don't report multiple times for panel orders
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect(
                "sorm.turnaround_time"
            )  # Could have different turnaround times for single order if different sub results. Just report each.
            query.addSelect(
                "sorm.turnaround_time - (%d - po.relative_time_start) as time_until_result"
                % relativeTime)  # Calculate time until expect result
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("sim_order_result_map as sorm")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere("po.clinical_item_id = sorm.clinical_item_id")
            query.addWhereEqual("sim_patient_id", patientId)

            # Only catch orders up to the given relativeTime and not cancelled
            query.addWhereOp("relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("relative_time_end is null")
            query.addWhereOp("relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            # Only PENDING orders, so don't report orders who results should already be available
            query.addWhereOp("sorm.turnaround_time + po.relative_time_start",
                             ">", relativeTime)

            query.addOrderBy("time_until_result")
            query.addOrderBy("relative_time_start")
            query.addOrderBy("ci.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#22
0
    def _getAdmitDxPatientFrequencyRankByYear(self):
        # Get list of all clinical item IDs matching admit diagnosis.
        # Get this list in advance to make subsequent query run a bit faster.
        admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds()

        # Build query for # of unique patients.
        # SELECT
        #   ci.name AS icd_code,
        #   ci.description AS admit_dx,
        #   EXTRACT(YEAR FROM pi.item_date) AS admit_year,
        #   COUNT(DISTINCT pi.patient_id) AS num_unique_patients,
        # FROM
        #   patient_item AS pi
        # JOIN
        #   clinical_item AS ci
        # ON
        #   pi.clinical_item_id = ci.clinical_item_id
        # WHERE
        #   ci.clinical_item_id in (admitDxClinicalItemIds)
        # GROUP BY
        #   icd_code,
        #   admit_dx,
        #   admit_year
        #   num_unique_patients
        # ORDER BY
        #   admit_year,
        #   num_unique_patients DESC
        query = SQLQuery()
        query.addSelect("ci.name AS icd_code")
        query.addSelect("ci.description AS admit_dx")
        query.addSelect("EXTRACT(YEAR FROM pi.item_date) AS admit_year")
        query.addSelect("COUNT(DISTINCT pi.patient_id) AS num_unique_patients")
        query.addFrom("patient_item AS pi")
        query.addJoin("clinical_item AS ci",
                      "pi.clinical_item_id = ci.clinical_item_id")
        query.addWhereIn("ci.clinical_item_id", admitDxClinicalItemIds)
        query.addGroupBy("icd_code")
        query.addGroupBy("admit_dx")
        query.addGroupBy("admit_year")
        query.addGroupBy("num_unique_patients")
        query.addOrderBy("icd_code")
        query.addOrderBy("admit_year")
        query.addOrderBy("num_unique_patients DESC")

        # Execute query.
        results = DBUtil.execute(query)
示例#23
0
    def _getPatientsLabsHistories(self, proc_codes):
        query = SQLQuery()
        # SELECT
        query.addSelect('pat_id')
        query.addSelect('abnormal_yn')
        query.addSelect('result_time')
        query.addSelect('proc_code')
        # FROM
        query.addFrom('stride_order_proc')
        # query.addFrom('patient_item as pi')
        # WHERE
        query.addWhereEqual('lab_status', 'Final result')
        # query.addWhereEqual('proc_code', proc_code)
        query.addWhereIn('proc_code', proc_codes)

        query.addOrderBy('proc_code')
        query.addOrderBy('pat_id')
        query.addOrderBy('result_time')

        return customDBUtil.execute(query)
示例#24
0
    def test_draw(self):
        # Query events by clinical_item_category.
        # SELECT
        #     pi.patient_id AS sequence_id,
        #     pi.item_date AS event_time,
        #     cic.description AS event_id
        # FROM
        #     patient_item AS pi
        # JOIN
        #     clinical_item AS ci
        # ON
        #     pi.clinical_item_id = ci.clinical_item_id
        # JOIN
        #     clinical_item_category AS cic
        # ON
        #     ci.clinical_item_category_id = cic.clinical_item_category_id
        # ORDER BY
        #     sequence_id,
        #     event_time,
        #     event_id
        query = SQLQuery()
        query.addSelect('pi.patient_id AS sequence_id')
        query.addSelect('pi.item_date AS event_time')
        query.addSelect('cic.description AS event_id')
        query.addFrom('patient_item AS pi')
        query.addJoin('clinical_item AS ci',
                      'pi.clinical_item_id = ci.clinical_item_id')
        query.addJoin(
            'clinical_item_category AS cic',
            'ci.clinical_item_category_id = cic.clinical_item_category_id')
        query.addOrderBy('sequence_id')
        query.addOrderBy('event_time')
        query.addOrderBy('event_id')
        events = DBUtil.execute(query)

        # Build graph based on clinical_item_category.
        categoryDigraph = EventDigraph(events)
        categoryDigraphVizFileName = "test-category-digraph.png"
        categoryDigraph.draw(categoryDigraphVizFileName)
示例#25
0
    def copyPatientTemplate(self, patientData, templatePatientId, conn=None):
        """Create a new patient record based on the given template patient ID to copy from.
        Will copy shallow attributes, overridden by any provided in the given patientData,
        as well as any patient states, notes, or physician orders UP TO (and including)
        relative time zero, but not subsequent states, notes, or physician orders 
        (the latter is expected to reflect real user interaction records).
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            templatePatientData = DBUtil.loadRecordModelById("sim_patient",
                                                             templatePatientId,
                                                             conn=conn)
            del templatePatientData["sim_patient_id"]
            # Remove prior ID to allow for new one
            templatePatientData.update(patientData)
            # Override with new content (if exists)
            DBUtil.insertRow("sim_patient", templatePatientData, conn=conn)
            # Create new patient record
            patientId = DBUtil.execute(DBUtil.identityQuery("sim_patient"),
                                       conn=conn)[0][0]

            # Copy initial template patient states
            query = SQLQuery()
            query.addSelect("*")
            # Copy all columns
            query.addFrom("sim_patient_state as sps")
            query.addWhereEqual("sps.sim_patient_id", templatePatientId)
            query.addWhereOp("relative_time_start", "<=", 0)
            query.addOrderBy("relative_time_start")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            nStates = len(dataModels)
            for i, dataModel in enumerate(dataModels):
                del dataModel["sim_patient_state_id"]
                # Discard copied ID to allow new one
                if i == nStates - 1:
                    del dataModel["relative_time_end"]
                    # Last state. Blank out end time to reflect open ended for simulation
                dataModel["sim_patient_id"] = patientId
                DBUtil.insertRow("sim_patient_state", dataModel, conn=conn)

            # Copy initial template orders
            query = SQLQuery()
            query.addSelect("*")
            query.addFrom("sim_patient_order as spo")
            query.addWhereEqual("sim_patient_id", templatePatientId)
            query.addWhereOp("relative_time_start", "<=", 0)
            query.addOrderBy("relative_time_start")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            for dataModel in dataModels:
                del dataModel["sim_patient_order_id"]
                dataModel["sim_patient_id"] = patientId
                DBUtil.insertRow("sim_patient_order", dataModel, conn=conn)

            conn.commit()
            # Transactional commit for multi-step process
            return patientId
        finally:
            if not extConn:
                conn.close()
示例#26
0
    def orderSetSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ic.item_collection_id")
            query.addSelect("ic.external_id")
            query.addSelect("ic.name as collection_name")
            query.addSelect("ic.section")
            query.addSelect("ic.subgroup")
            query.addSelect("ci.clinical_item_category_id")
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addFrom("item_collection as ic")
            query.addFrom("item_collection_item as ici")
            query.addFrom("clinical_item as ci")
            query.addWhere("ic.item_collection_id = ici.item_collection_id")
            query.addWhere("ici.clinical_item_id = ci.clinical_item_id")
            query.addWhereNotEqual("ic.section", AD_HOC_SECTION)
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                for searchWord in searchWords:
                    query.addWhereOp(
                        "ic.name", "~*",
                        "^%(searchWord)s|[^a-z]%(searchWord)s" %
                        {"searchWord": searchWord})
                    # Prefix search by regular expression
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
            query.addOrderBy("lower(ic.name)")
            query.addOrderBy("ic.external_id")
            query.addOrderBy("lower(ic.section)")
            query.addOrderBy("lower(ic.subgroup)")
            query.addOrderBy("ci.clinical_item_id")
            query.addOrderBy("ci.name")
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            # Aggregate up into order sets
            orderSetModel = None
            for row in dataModels:
                if orderSetModel is None or row[
                        "external_id"] != orderSetModel["external_id"]:
                    if orderSetModel is not None:
                        # Prior order set exists, yield/return it before preparing next one
                        yield orderSetModel
                    orderSetModel = \
                        {   "external_id": row["external_id"],
                            "name": row["collection_name"],
                            "itemList": list(),
                        }
                orderSetModel["itemList"].append(row)
            yield orderSetModel
            # Yield the last processed model
        finally:
            if not extConn:
                conn.close()
示例#27
0
    def loadResults(self, patientId, relativeTime, conn=None):
        """Load all results active by the given relativeTime.
        Will look for sim_patient_state times and sim_patient_order for diagnostic orders,
        to extrapolate all state-specific results for each order, or using default values
        if no state specific ones available.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # First query for all expected result labels and states, without state-specific values as
            #   may want outer join behvaior against default state values
            query = SQLQuery()

            query.addSelect("distinct spo.sim_state_id")
            query.addSelect("sr.sim_result_id")
            query.addSelect("sr.name")
            query.addSelect("sr.description")
            query.addSelect("sr.priority")
            query.addSelect("sr.group_string")

            query.addSelect("spo.relative_time_start")
            query.addSelect("sorm.turnaround_time")
            query.addSelect(
                "(spo.relative_time_start + sorm.turnaround_time) as result_relative_time"
            )

            query.addFrom("sim_patient_order as spo")
            query.addFrom("sim_order_result_map as sorm")
            query.addFrom("sim_result as sr")

            query.addWhere("spo.clinical_item_id = sorm.clinical_item_id")
            query.addWhere("sorm.sim_result_id = sr.sim_result_id")

            query.addWhereEqual("spo.sim_patient_id", patientId)
            # Only unlock results if appropiate prereq orders were placed in the past (and longer than the turnaround time)
            query.addWhereOp("spo.relative_time_start + sorm.turnaround_time",
                             "<=", relativeTime)

            query.addOrderBy("result_relative_time")
            query.addOrderBy("sr.priority")

            resultTable = DBUtil.execute(query,
                                         includeColumnNames=True,
                                         conn=conn)
            resultModels = modelListFromTable(resultTable)

            # Pass through results to get set of states to search for
            stateIds = set([DEFAULT_STATE_ID])
            # Include default state to fall back on
            for resultModel in resultModels:
                stateIds.add(resultModel["sim_state_id"])

            # Second query for state-specific values
            valueQuery = SQLQuery()
            valueQuery.addSelect("ssr.sim_state_id")
            valueQuery.addSelect("ssr.sim_result_id")
            valueQuery.addSelect("ssr.num_value")
            valueQuery.addSelect("ssr.num_value_noise")
            valueQuery.addSelect("ssr.text_value")
            valueQuery.addSelect("ssr.result_flag")
            valueQuery.addSelect("ssr.clinical_item_id")
            # Output clinical item if result flag means something
            valueQuery.addFrom("sim_state_result as ssr")
            valueQuery.addWhereIn("ssr.sim_state_id", stateIds)
            valueTable = DBUtil.execute(valueQuery,
                                        includeColumnNames=True,
                                        conn=conn)
            valueModels = modelListFromTable(valueTable)

            # Store in-memory dictionary for rapid cross-referencing "join" to result table
            valueModelByStateIdByResultId = dict()
            for valueModel in valueModels:
                resultId = valueModel["sim_result_id"]
                stateId = valueModel["sim_state_id"]
                if resultId not in valueModelByStateIdByResultId:
                    valueModelByStateIdByResultId[resultId] = dict()
                valueModelByStateIdByResultId[resultId][stateId] = valueModel

            # Now go back through original results and join up state-specific values, or use default values if needed
            resultValueModels = list()
            for resultModel in resultModels:
                resultId = resultModel["sim_result_id"]
                stateId = resultModel["sim_state_id"]
                if resultId in valueModelByStateIdByResultId:
                    valueModelByStateId = valueModelByStateIdByResultId[
                        resultId]
                    if stateId in valueModelByStateId:
                        # Have a state-specific value, populate that
                        valueModel = valueModelByStateId[stateId]
                        resultModel.update(valueModel)
                    elif DEFAULT_STATE_ID in valueModelByStateId:
                        # No state-specific value, but have a default one to populate instead
                        valueModel = valueModelByStateId[DEFAULT_STATE_ID]
                        resultModel.update(valueModel)
                    resultValueModels.append(resultModel)
                else:
                    # No result information available, even in default state. Skip these
                    #resultModel["num_value"] = None;
                    #resultModel["num_value_noise"] = None;
                    #resultModel["text_value"] = None;
                    #resultModel["result_flag"] = None;
                    #resultModel["clinical_item_id"] = None;
                    pass

            return resultValueModels
        finally:
            if not extConn:
                conn.close()
示例#28
0
    def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None):
        """Load basic information about the specified patients.
        Report patient state at given time, or default to time zero
        """
        if relativeTime is None:
            relativeTime = 0
            # Just look for time zero default then

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("sp.sim_patient_id")
            query.addSelect("sp.name")
            query.addSelect("sp.age_years")
            query.addSelect("sp.gender")
            query.addSelect("s.sim_state_id")
            query.addSelect("s.name as state_name")
            query.addSelect("s.description as state_description")
            query.addSelect("sps.relative_time_start")
            query.addSelect("sps.relative_time_end")
            query.addFrom("sim_patient as sp")
            query.addFrom("sim_patient_state as sps")
            query.addFrom("sim_state as s")
            query.addWhere("sp.sim_patient_id = sps.sim_patient_id")
            query.addWhere("sps.sim_state_id = s.sim_state_id")
            if patientIds is not None:
                query.addWhereIn("sp.sim_patient_id", patientIds)

            # Look for the state that matches the given relative time offset
            query.addWhereOp("sps.relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("sps.relative_time_end is null")
            query.addWhereOp("sps.relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            query.addOrderBy("sp.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            if len(dataModels) > 0:
                # Secondary query to build lookup table of possible state transition options from patient current states
                subQuery = SQLQuery()
                subQuery.addSelect("pre_state_id")
                subQuery.addSelect("post_state_id")
                subQuery.addSelect("clinical_item_id")
                subQuery.addSelect("time_trigger")
                subQuery.addFrom("sim_state_transition as sst")
                subQuery.addWhereIn(
                    "pre_state_id",
                    columnFromModelList(dataModels, "sim_state_id"))
                subResults = DBUtil.execute(subQuery, conn=conn)

                # For each pre-state, track which clinical items or times trigger which post-states
                postStateIdByItemIdByPreStateId = dict()
                postStateIdTimeTriggerByPreStateId = dict()
                for preStateId, postStateId, itemId, timeTrigger in subResults:
                    if preStateId not in postStateIdByItemIdByPreStateId:
                        postStateIdByItemIdByPreStateId[preStateId] = dict()
                    postStateIdByItemIdByPreStateId[preStateId][
                        itemId] = postStateId

                    if timeTrigger is not None:
                        postStateIdTimeTriggerByPreStateId[preStateId] = (
                            postStateId, timeTrigger)

                # Record in patient result models for retrieval
                for i, dataModel in enumerate(dataModels):
                    patientId = dataModel["sim_patient_id"]
                    stateId = dataModel["sim_state_id"]

                    dataModel["postStateIdByItemId"] = dict()
                    if stateId in postStateIdByItemIdByPreStateId:
                        dataModel[
                            "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[
                                stateId]
                    dataModel["postStateIdTimeTriggerByPreStateId"] = dict()
                    if stateId in postStateIdTimeTriggerByPreStateId:
                        dataModel[
                            "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[
                                stateId]

                    if dataModel[
                            "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel:
                        # Check that we haven't passed (and should thus trigger) a time-based state transition
                        (postStateId,
                         timeTrigger) = dataModel["postStateIdTimeTrigger"]
                        preStateTime = dataModel["relative_time_start"]
                        postStateTriggerTime = (preStateTime + timeTrigger)

                        if postStateTriggerTime <= relativeTime:  # Trigger state transition just by time elapsed
                            #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId
                            self.recordStateTransition(patientId,
                                                       stateId,
                                                       postStateId,
                                                       postStateTriggerTime,
                                                       conn=conn)
                            # State change which can yield new triggers, so recursively reload.
                            # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart
                            dataModels[i] = self.loadPatientInfo([patientId],
                                                                 relativeTime,
                                                                 conn=conn)[0]

            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#29
0
DATA_QUERY.addSelect("patient_id")
DATA_QUERY.addSelect("clinical_item_category_id")
DATA_QUERY.addSelect('name')
DATA_QUERY.addSelect("description")

# Join
DATA_QUERY.addFrom("patient_item")
DATA_QUERY.addJoin(
    "clinical_item",
    "patient_item.clinical_item_id = clinical_item.clinical_item_id",
    joinType="INNER")
DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description",
                         "Tt Med Univ (Primary)")  # Everyone
#DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert

DATA_QUERY.addOrderBy("patient_id", dir="ASC")

print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY)

unique_patient_ids = {}

# output = open("/Users/jwang/Desktop/expert.csv", "w")
# outlist = open("/Users/jwang/Desktop/expert_list.csv", "w")
output = open("/Users/jwang/Desktop/everyone.csv",
              "w")  #includes experts + trainees who are providing patient care
outlist = open("/Users/jwang/Desktop/everyone_list.csv", "w")
示例#30
0
    def _query_patient_episodes(self):
        log.info('Querying patient episodes...')
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Build parameters for query.
        self._lab_components = self._get_components_in_lab_panel()
        random_patient_list = self._get_random_patient_list()

        # Build SQL query for list of patient episodes.
        # Note that for 2008-2014 data, result_flag can take on any of the
        # following values:  High, Low, High Panic, Low Panic,
        # Low Off-Scale, Negative, Positive, Resistant, Susceptible, Abnormal, *
        # (NONE): Only 27 lab components can have this flag. None has this
        #           value for more than 5 results, so ignore it.
        # *: Only 10 lab components can have this flag. Only 6 have it for
        #           >10 tests, and each of them is a microbiology test for which
        #           a flag is less meaningful, e.g. Gram Stain, AFB culture.
        # Susceptible: Only 15 lab components can have this flag. All of those
        #           only have this value for 2 results, so ignore it.
        # Resistant: Only 1 lab component can have this flag. Only two results
        #           have this value, so ignore it.
        # Abnormal: 1462 lab components can have this flag. Many (e.g. UBLOOD)
        #           have this value for thousands of results, so include it.
        # Negative: Only 10 lab components can have this flag, and all for
        #           less than 5 results, so ignore it.
        # Positive: Only 3 lab components can have this flag, and all for
        #           only 1 result, so ignore it.
        # Low Off-Scale: Only 1 lab component can have this flag, and only for
        #           3 results, so ignore it.
        # Low Panic: 1401 lab components can have this flag, many core
        #           metabolic components. Include it.
        # High Panic: 8084 lab components can have this flag, many core
        #           metabolic components. Include it.

        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':

            query = SQLQuery()
            '''
            pat_id: hashed patient id
            '''
            query.addSelect('CAST(pat_id AS BIGINT) as pat_id')
            '''
            order_proc_id: unique identifier for an episode
            '''
            query.addSelect('sop.order_proc_id')
            '''
            self._varTypeInTable: usually proc_code or base_name, the column of the lab to be queried
            '''
            query.addSelect(self._varTypeInTable)
            '''
            order_time: The time of the order. Note that sor table does not have this info. 
            '''
            query.addSelect('order_time')
            '''
            y-labels related columns, choose one to predict (for now, use all_components_normal to predict). 
            '''
            if self._isLabPanel:
                query.addSelect(
                    "CASE WHEN abnormal_yn = 'Y' THEN 1 ELSE 0 END AS abnormal_panel"
                )  #
                query.addSelect(
                    "SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL THEN 1 ELSE 0 END) AS num_components"
                )  # sx
                query.addSelect(
                    "SUM(CASE WHEN result_flag IS NULL THEN 1 ELSE 0 END) AS num_normal_components"
                )  # sx
                query.addSelect(
                    "CAST(SUM(CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal"
                )  # sx
            else:
                query.addSelect(
                    "CASE WHEN result_flag IN ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') THEN 0 ELSE 1 END AS component_normal"
                )
            '''
            Relevant tables. Note that sor table does not have patient_id info; need to join sop to obtain it.  
            '''
            query.addFrom('stride_order_proc AS sop')
            query.addFrom('stride_order_results AS sor')
            query.addWhere('sop.order_proc_id = sor.order_proc_id')
            '''
            Condition: self._time_limit[0] < order_time < self._time_limit[1]
            '''
            if self._time_limit:
                if self._time_limit[0]:
                    query.addWhere("sop.order_time > '%s'" %
                                   self._time_limit[0])
                if self._time_limit[1]:
                    query.addWhere("sop.order_time < '%s'" %
                                   self._time_limit[1])

            query.addWhere(
                "(result_flag in ('High', 'Low', 'High Panic', 'Low Panic', '*', 'Abnormal') OR result_flag IS NULL)"
            )
            query.addWhereIn(self._varTypeInTable, [self._lab_var])  # sx
            query.addWhereIn("pat_id", random_patient_list)

            query.addGroupBy('pat_id')
            query.addGroupBy('sop.order_proc_id')
            query.addGroupBy(self._varTypeInTable)
            query.addGroupBy('order_time')
            if not self._isLabPanel:
                query.addGroupBy('result_flag')
            query.addGroupBy('abnormal_yn')  #

            query.addOrderBy('pat_id')
            query.addOrderBy('sop.order_proc_id')
            query.addOrderBy(self._varTypeInTable)
            query.addOrderBy('order_time')

            query.setLimit(self._num_requested_episodes)

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query, index_time_col='order_time')

        else:
            '''
            Sqlite3 has an interesting limit for the total number of place_holders in a query, 
            and this limit varies across platforms/operating systems (500-99999 on mac, 999 by defaulty). 
            
            To avoid this problem when querying 1000-10000 patient ids, use string queries instead of the
            default (convenient) routine in DBUtil.  
            '''

            query_str = "SELECT CAST(pat_id AS BIGINT) AS pat_id, order_proc_id, %s, order_time, " % self._varTypeInTable
            if not self._isLabPanel:
                query_str += "CASE WHEN result_in_range_yn = 'Y' THEN 1 ELSE 0 END AS component_normal "
            else:
                query_str += "CAST(SUM(CASE WHEN result_in_range_yn != 'Y' THEN 1 ELSE 0 END) = 0 AS INT) AS all_components_normal "
            query_str += "FROM labs "
            query_str += "WHERE %s = '%s' " % (self._varTypeInTable,
                                               self._lab_var)
            query_str += "AND pat_id IN "
            pat_list_str = "("
            for pat_id in random_patient_list:
                pat_list_str += str(pat_id) + ","
            pat_list_str = pat_list_str[:-1] + ") "
            query_str += pat_list_str
            query_str += "GROUP BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "ORDER BY pat_id, order_proc_id, %s, order_time " % self._varTypeInTable
            query_str += "LIMIT %d" % self._num_requested_episodes

            self._num_reported_episodes = FeatureMatrix._query_patient_episodes(
                self, query_str, index_time_col='order_time')