示例#1
0
    def _getAdmitDateRange(self):
        # Get list of all clinical item IDs matching admit diagnosis.
        # Get this list in advance to make subsequent query run a bit faster.
        admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds()

        # Build query for earliest and latest admissions.
        # SELECT
        #   MIN(item_date) AS first_admit_date,
        #   MAX(item_date) AS last_admit_date,
        # FROM
        #   patient_item
        # WHERE
        #   clinical_item_id in (admitDxClinicalItemIds)
        query = SQLQuery()
        query.addSelect("MIN(item_date) AS first_admit_date")
        query.addSelect("MAX(item_date) AS last_admit_date")
        query.addFrom("patient_item")
        query.addWhereIn("clinical_item_id", admitDxClinicalItemIds)

        # Execute query and return results.
        results = self._executeCachedDBQuery(query)
        firstAdmitDate = DBUtil.parseDateValue(results[0][0]).date()
        lastAdmitDate = DBUtil.parseDateValue(results[0][1]).date()

        return firstAdmitDate, lastAdmitDate
示例#2
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect("count(order_med_id_coded) as nOrders")
        query.addSelect("om.med_route, om.medication_id, om.med_description")
        query.addFrom("starr_datalake2018.order_med as om")
        if options.descriptionPrefix:
            query.addWhereOp("om.med_description", "like",
                             options.descriptionPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.medRoutes:
            query.addWhereIn("om.med_route", options.medRoutes.split(","))
        query.addGroupBy("om.medication_id, om.med_description, om.med_route")
        query.addOrderBy("nOrders desc, om.med_description")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#3
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        avg_orders_per_patient = self._get_average_orders_per_patient()
        log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
        # Based on average # of results, figure out how many patients we'd
        # need to get for a feature matrix of requested size.
        self._num_patients = int(numpy.max([self._num_requested_episodes / \
            avg_orders_per_patient, 1]))

        # Get numPatientsToQuery random patients who have gotten test.
        # TODO(sbala): Have option to feed in a seed for the randomness.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addFrom('stride_order_proc AS sop')
        query.addWhereIn('proc_code', [self._lab_panel])
        query.addOrderBy('RANDOM()')
        query.setLimit(self._num_patients)
        log.debug('Querying random patient list...')
        results = DBUtil.execute(query)

        # Get patient list.
        random_patient_list = [ row[0] for row in results ]

        return random_patient_list
示例#4
0
    def _get_components_in_lab_panel(self):
        # Initialize DB connection.
        cursor = self._connection.cursor()

        # Doing a single query results in a sequential scan through
        # stride_order_results. To avoid this, break up the query in two.

        # First, get all the order_proc_ids for proc_code.

        query = SQLQuery()
        query.addSelect('order_proc_id')
        query.addFrom('stride_order_proc')
        query.addWhereIn('proc_code', [self._lab_panel])
        query.addGroupBy('order_proc_id')
        log.debug('Querying order_proc_ids for %s...' % self._lab_panel)
        results = DBUtil.execute(query)
        lab_order_ids = [row[0] for row in results]

        # Second, get all base_names from those orders.
        query = SQLQuery()
        query.addSelect('base_name')
        query.addFrom('stride_order_results')
        query.addWhereIn('order_proc_id', lab_order_ids)
        query.addGroupBy('base_name')
        log.debug('Querying base_names for order_proc_ids...')
        results = DBUtil.execute(query)
        components = [row[0] for row in results]

        return components
示例#5
0
def queryLabResults(outputFile, patientById):
    log.info("Query out lab results, takes a while")
    labBaseNames = \
    (   'ferritin','fe','trfrn','trfsat','ystfrr',
        'wbc','hgb','hct','mcv','rdw','plt',
        'retic','reticab','ldh','hapto','tbil','ibil','dbil',
        'cr','esr','crp'
    )

    formatter = TextResultsFormatter(outputFile)

    # Query rapid when filter by lab result type, limited to X records.
    # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table?
    colNames = [
        "pat_id", "base_name", "common_name", "ord_num_value",
        "reference_unit", "result_flag", "sor.result_time"
    ]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("stride_order_results as sor, stride_order_proc as sop")
    query.addWhere("sor.order_proc_id = sop.order_proc_id")
    query.addWhereIn("base_name", labBaseNames)
    query.addWhereIn("pat_id", patientById.viewkeys())
    query.addOrderBy("pat_id")
    query.addOrderBy("sor.result_time")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#6
0
    def loadClinicalItemBaseCountByItemId(self, countPrefix=None, acceptCache=True, conn=None):
        """Helper query to get the baseline analyzed item counts for all of the clinical items
        If countPrefix is provided, can use alternative total item counts instead of the default item_count,
        such as patient_count or encounter_count to match the respective association query baselines used.
        """
        extConn = True;
        if conn is None:
            conn = self.connFactory.connection();
            extConn = False;
        try:
            # First ensure the denormalized count data is updated
            self.updateClinicalItemCounts(acceptCache=acceptCache, conn=conn);

            if countPrefix is None or countPrefix == "":
                countPrefix = "item_";  # Default to general item counts, allowing for repeats per patient

            baseCountQuery = SQLQuery();
            baseCountQuery.addSelect("clinical_item_id");
            baseCountQuery.addSelect("%scount" % countPrefix);
            baseCountQuery.addFrom("clinical_item");
            if acceptCache:
                baseCountResultTable = self.executeCacheOption( baseCountQuery, conn=conn );
            else:
                baseCountResultTable = DBUtil.execute( baseCountQuery, conn=conn );

            baseCountByItemId = dict();
            for (itemId, baseCount) in baseCountResultTable:
                baseCountByItemId[itemId] = baseCount;
            return baseCountByItemId;

        finally:
            if not extConn:
                conn.close();
示例#7
0
def queryOutpatientIronRx(outputFile, patientById):
    log.info("Query outpatient Iron prescriptions")

    # Medication IDs derived by mapping through Iron as an ingredient
    poIronIngredientMedicationIds = (3065, 3066, 3067, 3071, 3074, 3077, 3986,
                                     7292, 11050, 25006, 26797, 34528, 39676,
                                     78552, 79674, 83568, 84170, 85151, 96118,
                                     112120, 112395, 113213, 126035, 198511,
                                     200455, 201994, 201995, 203679, 207059,
                                     207404, 208037, 208072)
    # Medication IDs directly from prescriptions, formulations that did not map through RxNorm
    poIronDirectMedicationIds = (111354, 540526, 205010, 121171, 111320, 82791,
                                 93962, 201795, 206722, 201068, 116045, 208725,
                                 111341, 206637, 112400, 210256, 77529, 20844,
                                 83798, 205523, 112428, 125474, 111343)
    allEnteralIronMedicationIds = set(poIronIngredientMedicationIds).union(
        poIronDirectMedicationIds)

    formatter = TextResultsFormatter(outputFile)

    colNames = ["pat_id", "ordering_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("stride_order_med")
    query.addWhereIn("medication_id", allEnteralIronMedicationIds)
    query.addWhereIn("pat_id", patientById.viewkeys())
    query.addOrderBy("pat_id")
    query.addOrderBy("ordering_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
    def _insertUMichTestRecords(self):
        db_name = medinfo.db.Env.DB_PARAM['DSN']
        db_path = medinfo.db.Env.DB_PARAM['DATAPATH']
        conn = sqlite3.connect(db_path + '/' + db_name)

        table_names = ['labs', 'pt_info', 'demographics', 'encounters', 'diagnoses']

        for table_name in table_names:
            columns = FMTU.FM_TEST_INPUT_TABLES["%s_columns"%table_name]
            column_types = FMTU.FM_TEST_INPUT_TABLES["%s_column_types"%table_name]

            df = pd.DataFrame()
            for one_line in FMTU.FM_TEST_INPUT_TABLES['%s_data'%table_name]:
                df = df.append(dict(zip(columns, one_line)), ignore_index=True)

            df.to_sql(table_name, conn, if_exists="append", index=False)

        # First, write basic (pat_id, order_time) episode information to TempFile
        # Then, all [[pat_id, event_time]] operations are based on these episodes
        # i.e., pat_id are all from these pat_ids

        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS INTEGER) AS pat_id")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addFrom("labs")
        self.cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        self.factory.setPatientEpisodeInput(self.cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(episode["order_time"])
            resultPatientEpisodes.append(episode)
示例#9
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect(
            "cic.description, ci.clinical_item_id, ci.name, ci.description")
        query.addFrom("clinical_item_category as cic")
        query.addFrom("clinical_item as ci")
        query.addWhere(
            "cic.clinical_item_category_id = ci.clinical_item_category_id")
        if options.itemPrefix:
            query.addWhereOp("ci.description", "like",
                             options.itemPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.categoryNames:
            query.addWhereIn("cic.description",
                             options.categoryNames.split(","))
        query.addOrderBy(
            "cic.description, ci.name, ci.description, ci.clinical_item_id")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute \
        (   """delete from patient_item 
            where clinical_item_id in 
            (   select clinical_item_id
                from clinical_item as ci, clinical_item_category as cic
                where ci.clinical_item_category_id = cic.clinical_item_category_id
                and cic.source_table = '%s'
            );
            """ % TEST_SOURCE_TABLE
        )
        DBUtil.execute \
        (   """delete from clinical_item 
            where clinical_item_category_id in 
            (   select clinical_item_category_id 
                from clinical_item_category 
                where source_table = '%s'
            );
            """ % TEST_SOURCE_TABLE
        )
        DBUtil.execute(
            "delete from clinical_item_category where source_table = '%s';" %
            TEST_SOURCE_TABLE)

        query = SQLQuery()
        query.delete = True
        query.addFrom("stride_patient")
        query.addWhere("pat_id < 0")
        DBUtil.execute(query)

        DBTestCase.tearDown(self)
示例#11
0
    def loadPatientLastEventTime(self, patientId, conn=None):
        """Find the last simulated time where the patient received any user orders (or default to time 0).
        Makes for natural starting point for resuming a simulation.
        Note: Misses cases where patient state also changed, not triggered by order, but by just time lag.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("max(relative_time_start)")
            query.addSelect("max(relative_time_end)")
            query.addSelect(
                "0"
            )  # Simply select 0 as default time if don't find any others
            query.addFrom("sim_patient_order as po")
            query.addWhereEqual("sim_patient_id", patientId)

            lastOrderTime = max(
                value for value in DBUtil.execute(query, conn=conn)[0]
                if value is not None)
            return lastOrderTime
        finally:
            if not extConn:
                conn.close()
示例#12
0
    def querySourceItems(self, convOptions, progress=None, conn=None):
        """Query the database for list of all source clinical items (culture results, etc.)
        and yield the results one at a time.  If startDate provided, only return items whose
        occurence date is on or after that date.
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        # Column headers to query for that map to respective fields in analysis table
        headers = ["order_proc_anon_id","pat_anon_id","pat_enc_csn_anon_id","proc_code","organism_name","antibiotic_name","suseptibility", "shifted_result_time"];

        query = SQLQuery();
        for header in headers:
            query.addSelect( header );
        query.addFrom("stride_culture_micro");
        # TODO: FIGURE OUT WHY CAN"T DO >= OPERATION HERE
        # if convOptions.startDate is not None:
        #     query.addWhereOp("shifted_result_time",">=", convOptions.startDate);
        # if convOptions.endDate is not None:
        #     query.addWhereOp("shifted_result_time","<", convOptions.endDate);  # Still use begin date as common filter value
      
      
        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0];

        cursor = conn.cursor();
        # Do one massive query, but yield data for one item at a time.
        cursor.execute( str(query), tuple(query.params) );

        row = cursor.fetchone();
        while row is not None:
            rowModel = RowItemModel( row, headers );

            if rowModel['shifted_result_time'] is None: # Don't add if no result time given
                row = cursor.fetchone();
                continue

            if rowModel['organism_name'] is not None: # if positive culture but results uninterpretable, don't add feature
                if rowModel['suseptibility'] is None or rowModel['antibiotic_name'] == 'Method' or rowModel['antibiotic_name'] is None:
                    row = cursor.fetchone();
                    continue

            # So that we don't run into directory issues later when writing temp files
            try:
                rowModel['antibiotic_name'] = rowModel['antibiotic_name'].replace('/', '-') 
            except: # When antibiotic name is none
                pass

            yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone();

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close();

        if not extConn:
            conn.close();
示例#13
0
    def generatePatientItemsForCompositeId(self,
                                           clinicalItemIds,
                                           compositeId,
                                           conn=None):
        """Create patient_item records for the composite to match the given clinical item ID patient items.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Record linking information
            for componentId in clinicalItemIds:
                linkModel = RowItemModel()
                linkModel["clinical_item_id"] = compositeId
                linkModel["linked_item_id"] = componentId

                insertQuery = DBUtil.buildInsertQuery("clinical_item_link",
                                                      linkModel.keys())
                insertParams = linkModel.values()
                DBUtil.execute(insertQuery, insertParams, conn=conn)

            # Extract back link information, which will also flatten out any potential inherited links
            linkedItemIdsByBaseId = self.loadLinkedItemIdsByBaseId(conn=conn)
            linkedItemIds = linkedItemIdsByBaseId[compositeId]

            # Create patienItem records for the composite clinical item to overlap existing component ones
            # First query for the existing component records
            query = SQLQuery()
            query.addSelect("*")
            query.addFrom("patient_item")
            query.addWhereIn("clinical_item_id", linkedItemIds)
            results = DBUtil.execute(query, includeColumnNames=True, conn=conn)
            patientItems = modelListFromTable(results)

            # Patch component records to instead become composite item records then insert back into database
            progress = ProgressDots(total=len(patientItems))
            for patientItem in patientItems:
                del patientItem["patient_item_id"]
                patientItem["clinical_item_id"] = compositeId
                patientItem["analyze_date"] = None

                insertQuery = DBUtil.buildInsertQuery("patient_item",
                                                      patientItem.keys())
                insertParams = patientItem.values()

                try:
                    # Optimistic insert of a new unique item
                    DBUtil.execute(insertQuery, insertParams, conn=conn)
                except conn.IntegrityError, err:
                    # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible
                    log.info(err)
                progress.Update()

            # progress.PrintStatus();
        finally:
            if not extConn:
                conn.close()
示例#14
0
    def test_copyPatientTemplate(self):
        # Copy a patient template, including deep copy of notes, orders, states, but only up to relative time zero
        newPatientData = {
            "name": "Template Copy"
        }
        templatePatientId = -1
        self.testPatientId = self.manager.copyPatientTemplate(
            newPatientData, templatePatientId)
        futureTime = 1000000
        # Far future time to test that we still only copied the results up to time zero

        # Verify basic patient information
        patientCols = ["name", "age_years", "gender", "sim_state_id"]
        patientModel = self.manager.loadPatientInfo([self.testPatientId])[0]
        expectedPatientModel = RowItemModel(
            ["Template Copy", 60, "Female", -1], patientCols)
        self.assertEqualDict(expectedPatientModel, patientModel, patientCols)

        # Verify notes
        dataCols = ["sim_patient_id", "content"]
        sampleData = self.manager.loadNotes(self.testPatientId, futureTime)
        verifyData = \
            [   RowItemModel([self.testPatientId,"Initial Note"], dataCols),
                RowItemModel([self.testPatientId,"Initial Note"], dataCols),    # Second copy because another state initiation at time zero and negative onset time
            ]
        self.assertEqualDictList(verifyData, sampleData, dataCols)

        # Verify orders
        dataCols = [
            "sim_user_id", "sim_patient_id", "sim_state_id",
            "clinical_item_id", "relative_time_start", "relative_time_end"
        ]
        sampleData = self.manager.loadPatientOrders(self.testPatientId,
                                                    futureTime,
                                                    loadActive=None)
        verifyData = \
            [   RowItemModel([-1,self.testPatientId,-1,-15,0,None], dataCols),
            ]
        self.assertEqualDictList(verifyData, sampleData, dataCols)

        # Verify states
        dataCols = [
            "sim_patient_id", "sim_state_id", "relative_time_start",
            "relative_time_end"
        ]
        query = SQLQuery()
        for dataCol in dataCols:
            query.addSelect(dataCol)
        query.addFrom("sim_patient_state")
        query.addWhereEqual("sim_patient_id", self.testPatientId)
        query.addOrderBy("relative_time_start")
        sampleDataTable = DBUtil.execute(query, includeColumnNames=True)
        sampleData = modelListFromTable(sampleDataTable)
        verifyData = \
            [   RowItemModel([self.testPatientId,-1,-7200,0], dataCols),
                RowItemModel([self.testPatientId,-1,0,None], dataCols),
            ]
        self.assertEqualDictList(verifyData, sampleData, dataCols)
示例#15
0
    def querySourceItems(self, startDate=None, endDate=None, progress=None, conn=None):
        """Query the database for list of all source clinical items (lab results in this case)
        and yield the results one at a time.  If startDate provided, only return items
        whose result_time is on or after that date.
        Only include results records where the result_flag is set to an informative value,
        to focus only on abnormal lab results (including would be a ton more relatively uninformative
        data that would greatly expend data space and subsequent computation time)
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        # Column headers to query for that map to respective fields in analysis table
        headers = ["sor.order_proc_id", "pat_id", "pat_enc_csn_id", "order_type", "proc_id", "proc_code", "base_name", "component_name", "common_name", "ord_num_value", "result_flag", "result_in_range_yn", "sor.result_time"];

        query = SQLQuery();
        for header in headers:
            query.addSelect( header );
        query.addFrom("stride_order_proc as sop");
        query.addFrom("%s as sor" % SOURCE_TABLE);
        query.addWhere("sop.order_proc_id = sor.order_proc_id");
        #query.addWhere("result_flag <> '*'");   # Will exclude nulls and the uninformative '*' values for text-based microbiology results
        if startDate is not None:
            query.addWhereOp("sor.result_time",">=", startDate);
        if endDate is not None:
            query.addWhereOp("sor.result_time","<", endDate);

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0];

        cursor = conn.cursor();
        # Do one massive query, but yield data for one item at a time.
        cursor.execute( str(query), tuple(query.params) );

        row = cursor.fetchone();
        while row is not None:
            rowModel = RowItemModel( row, headers );
            # Normalize qualified labels
            rowModel["order_proc_id"] = rowModel["sor.order_proc_id"];
            rowModel["result_time"] = rowModel["sor.result_time"];

            if rowModel['base_name'] is None:
                row = cursor.fetchone()
                continue

            self.populateResultFlag(rowModel,conn=conn);

            yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone();

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close();

        if not extConn:
            conn.close();
示例#16
0
    def updateClinicalItemCounts(self, acceptCache=False, conn=None):
        """Update the summary item_counts for clinical_items based
        on clinical_item_association summary counts.

        If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated"
            in the data_cache table.  If it exists, assume we have done this update already, and no need to force the calculations again
        """

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            if acceptCache:
                isCacheUpdated = (self.getCacheData(
                    "clinicalItemCountsUpdated", conn=conn) is not None)
                if isCacheUpdated:
                    # Item count caches already updated, no need to recalculate them
                    return

            # First reset all counts to zero
            query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 "
            params = []
            if self.maxClinicalItemId is not None:  # Restrict to (test) data
                query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER
                params.append(self.maxClinicalItemId)
            DBUtil.execute(query, params, conn=conn)

            sqlQuery = SQLQuery()
            sqlQuery.addSelect("clinical_item_id")
            sqlQuery.addSelect("count_0 as item_count")
            sqlQuery.addSelect("patient_count_0 as patient_count")
            sqlQuery.addSelect("encounter_count_0 as encounter_count")
            sqlQuery.addFrom("clinical_item_association as ci")
            sqlQuery.addWhere("clinical_item_id = subsequent_item_id")
            # Look along "diagonal" of matrix for primary summary stats
            if self.maxClinicalItemId is not None:  # Restrict to (test) data
                sqlQuery.addWhereOp("clinical_item_id", "<",
                                    self.maxClinicalItemId)

            resultTable = DBUtil.execute(sqlQuery,
                                         includeColumnNames=True,
                                         conn=conn)
            resultModels = modelListFromTable(resultTable)

            for result in resultModels:
                DBUtil.updateRow("clinical_item",
                                 result,
                                 result["clinical_item_id"],
                                 conn=conn)

            # Make a note that this cache data has been updated
            self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn)
        finally:
            if not extConn:
                conn.close()
示例#17
0
 def _getNumPatients():
     ADMIT_DX_CATEGORY_ID = 2
     query = SQLQuery()
     query.addSelect()
     query.addFrom("patient_item")
     query = \
         """
         SELECT
         """
     print query
    def test_buildFeatureMatrix_prePostFeatures(self):
        """
        Test features parameter in addClinicalItemFeatures which allows
        client to specify they only want .pre* or .post* columns in feature
        matrix.
        """
        # Verify FeatureMatrixFactory throws Error if patientEpisodeInput
        # has not been set.
        with self.assertRaises(ValueError):
            self.factory.processPatientEpisodeInput()

        # Initialize DB cursor.
        cursor = self.connection.cursor()

        # Build SQL query for list of patient episodes.
        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)")
        patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id")
        patientEpisodeQuery.addSelect("proc_code")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addSelect(
            "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results"
        )
        patientEpisodeQuery.addFrom("stride_order_proc AS sop")
        patientEpisodeQuery.addFrom("stride_order_results AS sor")
        patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id")
        patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB")
        patientEpisodeQuery.addGroupBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        patientEpisodeQuery.addOrderBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        # Set and process patientEpisodeInput.
        self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(
                episode["order_time"])
            resultPatientEpisodes.append(episode)

        # Add TestItem100 and TestItem200 clinical item data.
        self.factory.addClinicalItemFeatures(["TestItem100"], features="pre")
        self.factory.addClinicalItemFeatures(["TestItem200"], features="post")
        self.factory.buildFeatureMatrix()
        resultMatrix = self.factory.readFeatureMatrixFile()
        expectedMatrix = FM_TEST_OUTPUT[
            "test_buildFeatureMatrix_prePostFeatures"]

        self.assertEqualList(resultMatrix[2:], expectedMatrix)
示例#19
0
    def clinicalItemSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                #query.openWhereOrClause()
                for searchField in ("ci.description", ):
                    for searchWord in searchWords:
                        query.addWhereOp(
                            searchField, "~*",
                            "^%(searchWord)s|[^a-z]%(searchWord)s" %
                            {"searchWord": searchWord
                             })  # Prefix search by regular expression
                #query.closeWhereOrClause()
            if itemQuery.sourceTables:
                query.addWhereIn("cic.source_table", itemQuery.sourceTables)
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
                query.addWhere(
                    "ci.item_count <> 0"
                )  # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis)

            if itemQuery.sortField:
                query.addOrderBy(itemQuery.sortField)
            query.addOrderBy("cic.description")
            query.addOrderBy("ci.name")
            query.addOrderBy("ci.description")
            if itemQuery.resultCount is not None:
                query.limit = itemQuery.resultCount
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#20
0
    def querySourceItems(self,
                         userSIDs,
                         limit=None,
                         offset=None,
                         progress=None,
                         conn=None):
        """Query the database for list of all AccessLogs
        and yield the results one at a time.  If userSIDs provided, only return items matching those IDs.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        headers = [
            "user_id", "user_name", "de_pat_id", "access_datetime",
            "metric_id", "metric_name", "line_count", "description",
            "metric_group_num", "metric_group_name"
        ]

        query = SQLQuery()
        for header in headers:
            query.addSelect(header)
        query.addFrom(self.sourceTableName)
        if userSIDs is not None:
            query.addWhereIn("user_id", userSIDs)
        query.setLimit(limit)
        query.setOffset(offset)

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)
            yield rowModel
            row = cursor.fetchone()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
示例#21
0
def loadRespiratoryViralPanelItemIds(extractor):
    # labCategoryId = 6;
    labCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Lab'")[0][0];

    query = SQLQuery();
    query.addSelect("clinical_item_id");
    query.addFrom("clinical_item");
    query.addWhereEqual("analysis_status", 1);
    query.addWhereIn("clinical_item_category_id", (labCategoryId,) );
    query.addWhere("description ~* '%s'" % 'Respiratory.*Panel' );
    respiratoryViralPanelItemIds = set();
    for row in DBUtil.execute(query):
        respiratoryViralPanelItemIds.add(row[0]);
    return respiratoryViralPanelItemIds;
示例#22
0
    def deactivateAnalysisByCount(self,
                                  thresholdInstanceCount,
                                  categoryIds=None,
                                  conn=None):
        """Find clinical items to deactivate, based on their instance (patient_item) counts
        being too low to be interesting.  Can restrict to applying to only items under certain categories.

        Use data/analysis/queryItemCounts.py to help guide selections with queries like:

            select count(clinical_item_id), sum(item_count)
            from clinical_item
            where item_count > %s
            and clinical_item_category_id in (%s)

            (and analysis_status = 1)?  Seems like good filter, but the process itself will change this count

        Direct search option as below, but that's usually for pre-processing before activations even start.
        Former meant to count records that have already gone through analysis.

            select clinical_item_id, count(distinct patient_id), count(distinct encounter_id), count(patient_item_id)
            from patient_item
            group by clinical_item_id

        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Make sure clinical item instance (patient item) counts are up to date
            self.updateClinicalItemCounts(conn=conn)
            query = SQLQuery()
            query.addSelect("clinical_item_id")
            query.addFrom("clinical_item")
            if self.maxClinicalItemId is not None:  # Restrict to limited / test data
                query.addWhereOp("clinical_item_id", "<",
                                 self.maxClinicalItemId)
            if categoryIds is not None:
                query.addWhereIn("clinical_item_category_id", categoryIds)
            query.addWhereOp("item_count", "<=", thresholdInstanceCount)
            results = DBUtil.execute(query, conn=conn)

            clinicalItemIds = set()
            for row in results:
                clinicalItemIds.add(row[0])

            self.deactivateAnalysis(clinicalItemIds, conn=conn)
        finally:
            if not extConn:
                conn.close()
示例#23
0
	def _getClinicalItems(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(CLINICAL_ITEM_ID)
		query.addSelect('name')
		# FROM
		query.addFrom('clinical_item')

		print query
		print query.getParams()
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=query.select).to_csv('clinical_items.csv', index=False)
示例#24
0
def queryDemographics(patientDF, baseDate):
    log.info("Populate demographics background for %d patients" % len(patientDF) );
    
    query = SQLQuery();
    query.addSelect("pat_id");
    query.addSelect("%d-birth_year as age" % baseDate.year );
    query.addSelect("gender");
    query.addSelect("primary_race");
    query.addFrom("stride_patient");
    query.addWhereIn("pat_id", patientDF["patientId"] );
    
    results = DBUtil.execute(query);
    cols = ["patientId","age","gender","race"];
    newDF = pd.DataFrame(results,columns=cols);
    return patientDF.merge(newDF, how="left");
    def _getNonNullComponents(self):
        query = SQLQuery()
        # SELECT
        query.addSelect(BASE_NAME)
        # query.addSelect('max_result_flag')
        # FROM
        query.addFrom('order_result_stat')
        # WHERE
        query.addWhere('max_result_flag is not null')

        results = DBUtil.execute(query)

        pd.DataFrame(results, columns=query.select).to_csv(DATA_FOLDER +
                                                           'base_names.csv',
                                                           index=False)
示例#26
0
def loadBloodCultureItemIds(extractor):
    # microCategoryId = 15;
    microCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Microbiology'")[0][0];

    # Look for diagnostic tests indicating suspected infection / sepsis
    query = SQLQuery();
    query.addSelect("clinical_item_id");
    query.addFrom("clinical_item");
    query.addWhereEqual("analysis_status", 1);
    query.addWhereIn("clinical_item_category_id", (microCategoryId,) );
    query.addWhere("description ~* '%s'" % 'Blood Culture' );
    bloodCultureItemIds = set();
    for row in DBUtil.execute(query):
        bloodCultureItemIds.add(row[0]);
    return bloodCultureItemIds;
示例#27
0
def get_cnt(lab, lab_type, columns):

    if lab_type == 'panel':
        query = SQLQuery()
        for column in columns:
            query.addSelect(column)

        query.addFrom('stride_order_proc')
        query.addWhere("proc_code='%s'" % lab)
        query.addWhere("order_time >= '%s-01-01'" % str(2014))
        query.addWhere("order_time <= '%s-12-31'" % str(2016))
        # query.addWhere("order_status = 'Completed'") # TODO: what about ""

    results = DBUtil.execute(query)
    return results
示例#28
0
文件: queries.py 项目: xxxx3/CDSS
def query_for_recent6months(newPatientOnly=True, referral_name=None, include_med=False):
    query = SQLQuery()
    query.addSelect('p1.pat_enc_csn_id_coded AS referral_enc_id')
    query.addSelect('p1.description AS referral_name')
    query.addSelect('e1.appt_when_jittered AS referral_time')
    query.addSelect('e1.jc_uid AS pat_id')
    query.addSelect('d1.icd9 AS referral_icd9')
    query.addSelect('d1.icd10 AS referral_icd10')

    query.addSelect('p2.pat_enc_csn_id_coded AS specialty_enc_id')
    query.addSelect('e2.appt_when_jittered AS specialty_time')
    query.addSelect('d2.department_name AS specialty_dep')
    query.addSelect('d2.specialty AS specialty_name')
    query.addSelect('p2.description AS specialty_order')
    if include_med:
        query.addSelect('o2.amb_med_disp_name as medication')

    query.addFrom('%s AS e1' % ENCOUNTER_TABLE)
    query.addFrom('%s AS p1' % ORDERPROC_TABLE)
    query.addFrom('%s AS d1' % DIAGNOSIS_TABLE)

    query.addFrom('%s AS e2' % ENCOUNTER_TABLE)
    query.addFrom('%s AS p2' % ORDERPROC_TABLE)
    query.addFrom('%s AS d2' % DEPMAP_TABLE)

    query.addFrom('%s AS o2' % ORDERMED_TABLE)

    query.addWhere("lower(p1.description) like '%referral%'")
    query.addWhere("p1.pat_enc_csn_id_coded = e1.pat_enc_csn_id_coded")
    query.addWhere("p1.pat_enc_csn_id_coded = d1.pat_enc_csn_id_coded")
    query.addWhere("e1.appt_when_jittered >= '2016-01-01'")
    query.addWhere("e1.appt_when_jittered < '2017-01-01'")

    query.addWhere("e1.jc_uid = e2.jc_uid")
    if referral_name:
        query.addWhere("p1.description = '%s'" % referral_name)
    query.addWhere("e1.pat_enc_csn_id_coded != e2.pat_enc_csn_id_coded")
    query.addWhere("e1.appt_when_jittered <= e2.appt_when_jittered")
    query.addWhere("DATE_ADD(date(timestamp(e1.appt_when_jittered)), INTERVAL 6 month) > date(timestamp(e2.appt_when_jittered))")

    if newPatientOnly:
        query.addWhere("e2.visit_type like '%NEW PATIENT%'")
    query.addWhere("e2.department_id = d2.department_id")
    query.addWhere("p2.pat_enc_csn_id_coded = e2.pat_enc_csn_id_coded")

    if include_med:
        query.addWhere("o2.pat_enc_csn_id_coded = e2.pat_enc_csn_id_coded")
    return str(query)
示例#29
0
def queryClinicalItems(outputFile, clinicalItemIds, patientById):
    log.info("Query Clinical Items: %s" % str(clinicalItemIds))
    formatter = TextResultsFormatter(outputFile)

    colNames = ["patient_id", "item_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("patient_item")
    query.addWhereIn("clinical_item_id", clinicalItemIds)
    query.addWhereIn("patient_id", patientById.viewkeys())
    query.addOrderBy("patient_id")
    query.addOrderBy("item_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#30
0
	def _getNonNullBaseNames(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(BASE_NAME)
		query.addSelect('max_result_flag')
		# FROM
		query.addFrom('order_result_stat')
		# WHERE
		query.addWhere('max_result_flag is not null')

		print query
		print query.getParams()
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)