示例#1
0
def queryLabResults(outputFile, patientById):
    log.info("Query out lab results, takes a while")
    labBaseNames = \
    (   'ferritin','fe','trfrn','trfsat','ystfrr',
        'wbc','hgb','hct','mcv','rdw','plt',
        'retic','reticab','ldh','hapto','tbil','ibil','dbil',
        'cr','esr','crp'
    )

    formatter = TextResultsFormatter(outputFile)

    # Query rapid when filter by lab result type, limited to X records.
    # Filtering by patient ID drags down substantially until preloaded table by doing a count on the SOR table?
    colNames = [
        "pat_id", "base_name", "common_name", "ord_num_value",
        "reference_unit", "result_flag", "sor.result_time"
    ]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("stride_order_results as sor, stride_order_proc as sop")
    query.addWhere("sor.order_proc_id = sop.order_proc_id")
    query.addWhereIn("base_name", labBaseNames)
    query.addWhereIn("pat_id", patientById.viewkeys())
    query.addOrderBy("pat_id")
    query.addOrderBy("sor.result_time")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#2
0
    def _getAdmitDateRange(self):
        # Get list of all clinical item IDs matching admit diagnosis.
        # Get this list in advance to make subsequent query run a bit faster.
        admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds()

        # Build query for earliest and latest admissions.
        # SELECT
        #   MIN(item_date) AS first_admit_date,
        #   MAX(item_date) AS last_admit_date,
        # FROM
        #   patient_item
        # WHERE
        #   clinical_item_id in (admitDxClinicalItemIds)
        query = SQLQuery()
        query.addSelect("MIN(item_date) AS first_admit_date")
        query.addSelect("MAX(item_date) AS last_admit_date")
        query.addFrom("patient_item")
        query.addWhereIn("clinical_item_id", admitDxClinicalItemIds)

        # Execute query and return results.
        results = self._executeCachedDBQuery(query)
        firstAdmitDate = DBUtil.parseDateValue(results[0][0]).date()
        lastAdmitDate = DBUtil.parseDateValue(results[0][1]).date()

        return firstAdmitDate, lastAdmitDate
示例#3
0
文件: extractData.py 项目: xxxx3/CDSS
def queryPatients(period, locations, rxCount):
    log.info(
        "Select patients fitting criteria in designated time period: (%s,%s)" %
        period)

    query = SQLQuery()
    query.addSelect("med.pat_id")
    query.addSelect("count(order_med_id)")
    query.addFrom("stride_mapped_meds as map")
    query.addFrom("stride_order_med as med")
    query.addFrom("stride_patient as pat")
    query.addWhere("analysis_status = 1")
    query.addWhere("map.medication_id = med.medication_id")
    query.addWhere("med.pat_id = pat.pat_id")
    query.addWhere("possible_oncology = 0")
    query.addWhereIn("patient_location", locations)
    query.addWhereOp("ordering_datetime", ">", period[0])
    query.addWhereOp("ordering_datetime", "<", period[-1])
    query.addGroupBy("med.pat_id")
    query.addHaving("count(order_med_id) >2")

    results = DBUtil.execute(query)
    cols = ["patientId", "nOpioidRx"]
    patientDF = pd.DataFrame(results, columns=cols)
    #patientDF.set_index("patientId",drop=False,inplace=True);

    patientDF["periodStart"] = period[0]
    # Identify this group of patient records

    return patientDF
示例#4
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect(
            "cic.description, ci.clinical_item_id, ci.name, ci.description")
        query.addFrom("clinical_item_category as cic")
        query.addFrom("clinical_item as ci")
        query.addWhere(
            "cic.clinical_item_category_id = ci.clinical_item_category_id")
        if options.itemPrefix:
            query.addWhereOp("ci.description", "like",
                             options.itemPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.categoryNames:
            query.addWhereIn("cic.description",
                             options.categoryNames.split(","))
        query.addOrderBy(
            "cic.description, ci.name, ci.description, ci.clinical_item_id")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#5
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect("count(order_med_id_coded) as nOrders")
        query.addSelect("om.med_route, om.medication_id, om.med_description")
        query.addFrom("starr_datalake2018.order_med as om")
        if options.descriptionPrefix:
            query.addWhereOp("om.med_description", "like",
                             options.descriptionPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.medRoutes:
            query.addWhereIn("om.med_route", options.medRoutes.split(","))
        query.addGroupBy("om.medication_id, om.med_description, om.med_route")
        query.addOrderBy("nOrders desc, om.med_description")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#6
0
    def _get_components_in_lab_panel(self):
        # Initialize DB connection.
        cursor = self._connection.cursor()

        # Doing a single query results in a sequential scan through
        # stride_order_results. To avoid this, break up the query in two.

        # First, get all the order_proc_ids for proc_code.

        query = SQLQuery()
        query.addSelect('order_proc_id')
        query.addFrom('stride_order_proc')
        query.addWhereIn('proc_code', [self._lab_panel])
        query.addGroupBy('order_proc_id')
        log.debug('Querying order_proc_ids for %s...' % self._lab_panel)
        results = DBUtil.execute(query)
        lab_order_ids = [row[0] for row in results]

        # Second, get all base_names from those orders.
        query = SQLQuery()
        query.addSelect('base_name')
        query.addFrom('stride_order_results')
        query.addWhereIn('order_proc_id', lab_order_ids)
        query.addGroupBy('base_name')
        log.debug('Querying base_names for order_proc_ids...')
        results = DBUtil.execute(query)
        components = [row[0] for row in results]

        return components
示例#7
0
def queryOutpatientIronRx(outputFile, patientById):
    log.info("Query outpatient Iron prescriptions")

    # Medication IDs derived by mapping through Iron as an ingredient
    poIronIngredientMedicationIds = (3065, 3066, 3067, 3071, 3074, 3077, 3986,
                                     7292, 11050, 25006, 26797, 34528, 39676,
                                     78552, 79674, 83568, 84170, 85151, 96118,
                                     112120, 112395, 113213, 126035, 198511,
                                     200455, 201994, 201995, 203679, 207059,
                                     207404, 208037, 208072)
    # Medication IDs directly from prescriptions, formulations that did not map through RxNorm
    poIronDirectMedicationIds = (111354, 540526, 205010, 121171, 111320, 82791,
                                 93962, 201795, 206722, 201068, 116045, 208725,
                                 111341, 206637, 112400, 210256, 77529, 20844,
                                 83798, 205523, 112428, 125474, 111343)
    allEnteralIronMedicationIds = set(poIronIngredientMedicationIds).union(
        poIronDirectMedicationIds)

    formatter = TextResultsFormatter(outputFile)

    colNames = ["pat_id", "ordering_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("stride_order_med")
    query.addWhereIn("medication_id", allEnteralIronMedicationIds)
    query.addWhereIn("pat_id", patientById.viewkeys())
    query.addOrderBy("pat_id")
    query.addOrderBy("ordering_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
示例#8
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        avg_orders_per_patient = self._get_average_orders_per_patient()
        log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
        # Based on average # of results, figure out how many patients we'd
        # need to get for a feature matrix of requested size.
        self._num_patients = int(numpy.max([self._num_requested_episodes / \
            avg_orders_per_patient, 1]))

        # Get numPatientsToQuery random patients who have gotten test.
        # TODO(sbala): Have option to feed in a seed for the randomness.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addFrom('stride_order_proc AS sop')
        query.addWhereIn('proc_code', [self._lab_panel])
        query.addOrderBy('RANDOM()')
        query.setLimit(self._num_patients)
        log.debug('Querying random patient list...')
        results = DBUtil.execute(query)

        # Get patient list.
        random_patient_list = [ row[0] for row in results ]

        return random_patient_list
示例#9
0
    def generatePatientItemsForCompositeId(self,
                                           clinicalItemIds,
                                           compositeId,
                                           conn=None):
        """Create patient_item records for the composite to match the given clinical item ID patient items.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Record linking information
            for componentId in clinicalItemIds:
                linkModel = RowItemModel()
                linkModel["clinical_item_id"] = compositeId
                linkModel["linked_item_id"] = componentId

                insertQuery = DBUtil.buildInsertQuery("clinical_item_link",
                                                      linkModel.keys())
                insertParams = linkModel.values()
                DBUtil.execute(insertQuery, insertParams, conn=conn)

            # Extract back link information, which will also flatten out any potential inherited links
            linkedItemIdsByBaseId = self.loadLinkedItemIdsByBaseId(conn=conn)
            linkedItemIds = linkedItemIdsByBaseId[compositeId]

            # Create patienItem records for the composite clinical item to overlap existing component ones
            # First query for the existing component records
            query = SQLQuery()
            query.addSelect("*")
            query.addFrom("patient_item")
            query.addWhereIn("clinical_item_id", linkedItemIds)
            results = DBUtil.execute(query, includeColumnNames=True, conn=conn)
            patientItems = modelListFromTable(results)

            # Patch component records to instead become composite item records then insert back into database
            progress = ProgressDots(total=len(patientItems))
            for patientItem in patientItems:
                del patientItem["patient_item_id"]
                patientItem["clinical_item_id"] = compositeId
                patientItem["analyze_date"] = None

                insertQuery = DBUtil.buildInsertQuery("patient_item",
                                                      patientItem.keys())
                insertParams = patientItem.values()

                try:
                    # Optimistic insert of a new unique item
                    DBUtil.execute(insertQuery, insertParams, conn=conn)
                except conn.IntegrityError, err:
                    # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible
                    log.info(err)
                progress.Update()

            # progress.PrintStatus();
        finally:
            if not extConn:
                conn.close()
    def queryPatientClinicalItemData(self, analysisQuery, conn):
        """Query for all of the order / item data for each patient
        noted in the analysisQuery and yield them one list of clinicalItemIds
        at a time.
        Generated iterator over 2-ples (patientId, clinicalItemIdList)
            - Patient ID: ID of the patient for which the currently yielded item intended for
            - Clinical Item ID List:
                List of all of the clinical items / orders for this patient
                ordered by item date (currently excluding those that are off the "default_recommend" / on the "default exclusion" list).
        """
        sqlQuery = SQLQuery();
        sqlQuery.addSelect("pi.patient_id");
        sqlQuery.addSelect("pi.clinical_item_id");
        #sqlQuery.addSelect("pi.item_date");
        sqlQuery.addFrom("clinical_item_category as cic");
        sqlQuery.addFrom("clinical_item as ci");
        sqlQuery.addFrom("patient_item as pi");
        sqlQuery.addWhere("cic.clinical_item_category_id = ci.clinical_item_category_id");
        sqlQuery.addWhere("ci.clinical_item_id = pi.clinical_item_id");

        sqlQuery.addWhereIn("pi.patient_id", analysisQuery.patientIds );

        sqlQuery.addOrderBy("pi.patient_id");
        sqlQuery.addOrderBy("pi.item_date");

        # Execute the actual query for patient order / item data
        cursor = conn.cursor();
        cursor.execute( str(sqlQuery), tuple(sqlQuery.params) );

        currentPatientId = None;
        clinicalItemIdList = list();

        row = cursor.fetchone();
        while row is not None:
            (patientId, clinicalItemId) = row;
            if currentPatientId is None:
                currentPatientId = patientId;

            if patientId != currentPatientId:
                # Changed patient, yield the existing data for the previous patient
                yield (currentPatientId, clinicalItemIdList);
                # Update our data tracking for the current patient
                currentPatientId = patientId;
                clinicalItemIdList = list();

            clinicalItemIdList.append(clinicalItemId);

            row = cursor.fetchone();

        # Yield / return the last patient data
        yield (currentPatientId, clinicalItemIdList);

        cursor.close();
示例#11
0
    def clinicalItemSearch(self, itemQuery, conn=None):
        """Look for clinical items based on specified query criteria"""
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("ci.clinical_item_id")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            if itemQuery.searchStr is not None:
                searchWords = itemQuery.searchStr.split()
                #query.openWhereOrClause()
                for searchField in ("ci.description", ):
                    for searchWord in searchWords:
                        query.addWhereOp(
                            searchField, "~*",
                            "^%(searchWord)s|[^a-z]%(searchWord)s" %
                            {"searchWord": searchWord
                             })  # Prefix search by regular expression
                #query.closeWhereOrClause()
            if itemQuery.sourceTables:
                query.addWhereIn("cic.source_table", itemQuery.sourceTables)
            if itemQuery.analysisStatus is not None:
                query.addWhereEqual("ci.analysis_status",
                                    itemQuery.analysisStatus)
                query.addWhere(
                    "ci.item_count <> 0"
                )  # Also ignore items with no occurence in the analyzed data (occurs if item was accepted for analysis from multi-year dataset, but never used in a sub-time frame's analysis)

            if itemQuery.sortField:
                query.addOrderBy(itemQuery.sortField)
            query.addOrderBy("cic.description")
            query.addOrderBy("ci.name")
            query.addOrderBy("ci.description")
            if itemQuery.resultCount is not None:
                query.limit = itemQuery.resultCount
            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
示例#12
0
    def querySourceItems(self,
                         userSIDs,
                         limit=None,
                         offset=None,
                         progress=None,
                         conn=None):
        """Query the database for list of all AccessLogs
        and yield the results one at a time.  If userSIDs provided, only return items matching those IDs.
        """
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Column headers to query for that map to respective fields in analysis table
        headers = [
            "user_id", "user_name", "de_pat_id", "access_datetime",
            "metric_id", "metric_name", "line_count", "description",
            "metric_group_num", "metric_group_name"
        ]

        query = SQLQuery()
        for header in headers:
            query.addSelect(header)
        query.addFrom(self.sourceTableName)
        if userSIDs is not None:
            query.addWhereIn("user_id", userSIDs)
        query.setLimit(limit)
        query.setOffset(offset)

        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(),
                                            conn=conn)[0][0]

        cursor = conn.cursor()
        # Do one massive query, but yield data for one item at a time.
        cursor.execute(str(query), tuple(query.params))

        row = cursor.fetchone()
        while row is not None:
            rowModel = RowItemModel(row, headers)
            yield rowModel
            row = cursor.fetchone()

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close()

        if not extConn:
            conn.close()
示例#13
0
def loadRespiratoryViralPanelItemIds(extractor):
    # labCategoryId = 6;
    labCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Lab'")[0][0];

    query = SQLQuery();
    query.addSelect("clinical_item_id");
    query.addFrom("clinical_item");
    query.addWhereEqual("analysis_status", 1);
    query.addWhereIn("clinical_item_category_id", (labCategoryId,) );
    query.addWhere("description ~* '%s'" % 'Respiratory.*Panel' );
    respiratoryViralPanelItemIds = set();
    for row in DBUtil.execute(query):
        respiratoryViralPanelItemIds.add(row[0]);
    return respiratoryViralPanelItemIds;
示例#14
0
    def _get_average_orders_per_patient(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':  #TODO: add STRIDE component routine
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
            query.addFrom('stride_order_proc AS sop')
            query.addFrom('stride_order_results AS sor')
            query.addWhere('sop.order_proc_id = sor.order_proc_id')
            query.addWhereIn("proc_code", [self._lab_panel])
            components = self._get_components_in_lab_panel()
            query.addWhereIn("base_name", components)
            query.addGroupBy('pat_id')

        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addSelect('COUNT(order_proc_id) AS num_orders')
            query.addFrom('labs')
            query.addWhereIn(self._varTypeInTable, [self._lab_var])
            components = self._get_components_in_lab_panel()
            query.addWhereIn("base_name", components)
            query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')
        results = DBUtil.execute(query)
        order_counts = [row[1] for row in results]
        if len(order_counts) == 0:
            error_msg = '0 orders for lab "%s."' % self._lab_var
            log.critical(error_msg)
            raise Exception(error_msg)
            # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
        else:
            return numpy.median(order_counts)
示例#15
0
    def deactivateAnalysisByCount(self,
                                  thresholdInstanceCount,
                                  categoryIds=None,
                                  conn=None):
        """Find clinical items to deactivate, based on their instance (patient_item) counts
        being too low to be interesting.  Can restrict to applying to only items under certain categories.

        Use data/analysis/queryItemCounts.py to help guide selections with queries like:

            select count(clinical_item_id), sum(item_count)
            from clinical_item
            where item_count > %s
            and clinical_item_category_id in (%s)

            (and analysis_status = 1)?  Seems like good filter, but the process itself will change this count

        Direct search option as below, but that's usually for pre-processing before activations even start.
        Former meant to count records that have already gone through analysis.

            select clinical_item_id, count(distinct patient_id), count(distinct encounter_id), count(patient_item_id)
            from patient_item
            group by clinical_item_id

        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Make sure clinical item instance (patient item) counts are up to date
            self.updateClinicalItemCounts(conn=conn)
            query = SQLQuery()
            query.addSelect("clinical_item_id")
            query.addFrom("clinical_item")
            if self.maxClinicalItemId is not None:  # Restrict to limited / test data
                query.addWhereOp("clinical_item_id", "<",
                                 self.maxClinicalItemId)
            if categoryIds is not None:
                query.addWhereIn("clinical_item_category_id", categoryIds)
            query.addWhereOp("item_count", "<=", thresholdInstanceCount)
            results = DBUtil.execute(query, conn=conn)

            clinicalItemIds = set()
            for row in results:
                clinicalItemIds.add(row[0])

            self.deactivateAnalysis(clinicalItemIds, conn=conn)
        finally:
            if not extConn:
                conn.close()
示例#16
0
def queryDemographics(patientDF, baseDate):
    log.info("Populate demographics background for %d patients" % len(patientDF) );
    
    query = SQLQuery();
    query.addSelect("pat_id");
    query.addSelect("%d-birth_year as age" % baseDate.year );
    query.addSelect("gender");
    query.addSelect("primary_race");
    query.addFrom("stride_patient");
    query.addWhereIn("pat_id", patientDF["patientId"] );
    
    results = DBUtil.execute(query);
    cols = ["patientId","age","gender","race"];
    newDF = pd.DataFrame(results,columns=cols);
    return patientDF.merge(newDF, how="left");
示例#17
0
def loadBloodCultureItemIds(extractor):
    # microCategoryId = 15;
    microCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like 'Microbiology'")[0][0];

    # Look for diagnostic tests indicating suspected infection / sepsis
    query = SQLQuery();
    query.addSelect("clinical_item_id");
    query.addFrom("clinical_item");
    query.addWhereEqual("analysis_status", 1);
    query.addWhereIn("clinical_item_category_id", (microCategoryId,) );
    query.addWhere("description ~* '%s'" % 'Blood Culture' );
    bloodCultureItemIds = set();
    for row in DBUtil.execute(query):
        bloodCultureItemIds.add(row[0]);
    return bloodCultureItemIds;
示例#18
0
def queryClinicalItems(outputFile, clinicalItemIds, patientById):
    log.info("Query Clinical Items: %s" % str(clinicalItemIds))
    formatter = TextResultsFormatter(outputFile)

    colNames = ["patient_id", "item_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("patient_item")
    query.addWhereIn("clinical_item_id", clinicalItemIds)
    query.addWhereIn("patient_id", patientById.viewkeys())
    query.addOrderBy("patient_id")
    query.addOrderBy("item_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
    def prepareItemAssociations(self, itemIdPairs, linkedItemIdsByBaseId,
                                conn):
        """Make sure all pair-wise item association records are ready / initialized
        so that subsequent queries don't have to pause to check for their existence.
        Should help greatly to reduce number of queries and execution time.
        """
        clinicalItemIdSet = set()
        #Do the below to convert the list of strings into a list of pairs, which is needed for the rest of this function
        for index, pair in enumerate(itemIdPairs):
            itemIdPairs[index] = eval(pair)

        for (itemId1, itemId2) in itemIdPairs:
            clinicalItemIdSet.add(itemId1)
            clinicalItemIdSet.add(itemId2)
        nItems = len(clinicalItemIdSet)

        # Now go through all needed item pairs and create default records as needed
        log.debug("Ensure %d baseline records ready" % (nItems * nItems))
        for itemId1 in clinicalItemIdSet:
            # Query to see which ones already exist in the database
            # Do this for each source clinical item instead of all combinations to avoid excessive in memory tracking
            query = SQLQuery()
            query.addSelect("clinical_item_id")
            query.addSelect("subsequent_item_id")
            query.addFrom("clinical_item_association")
            query.addWhereEqual("clinical_item_id", itemId1)
            query.addWhereIn("subsequent_item_id", clinicalItemIdSet)
            associationTable = DBUtil.execute(query, conn=conn)

            # Keep track in memory temporarily for rapid lookup
            existingItemIdPairs = set()
            for row in associationTable:
                existingItemIdPairs.add(tuple(row))

            for itemId2 in clinicalItemIdSet:
                itemIdPair = (itemId1, itemId2)
                if itemIdPair not in existingItemIdPairs and self.acceptableClinicalItemIdPair(
                        itemId1, itemId2, linkedItemIdsByBaseId):
                    defaultAssociation = RowItemModel(
                        itemIdPair, ("clinical_item_id", "subsequent_item_id"))
                    try:  # Optimistic insert of a new item pair, should be safe since just checked above, but parallel processes may collide
                        DBUtil.insertRow("clinical_item_association",
                                         defaultAssociation,
                                         conn=conn)
                    except conn.IntegrityError, err:
                        log.warning(err)
                        pass
示例#20
0
    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")
        query = SQLQuery()
        query.delete = True
        query.addFrom("access_log")
        query.addWhereIn("user_id", self.testUserIDs)
        DBUtil.execute(query)

        query = SQLQuery()
        query.delete = True
        query.addFrom("user")
        query.addWhereIn("user_id", self.testUserIDs)
        DBUtil.execute(query)

        DBUtil.execute("drop table %s;" % TEST_SOURCE_TABLE)
        DBTestCase.tearDown(self)
示例#21
0
    def _getAdmitDxPatientFrequencyRankByYear(self):
        # Get list of all clinical item IDs matching admit diagnosis.
        # Get this list in advance to make subsequent query run a bit faster.
        admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds()

        # Build query for # of unique patients.
        # SELECT
        #   ci.name AS icd_code,
        #   ci.description AS admit_dx,
        #   EXTRACT(YEAR FROM pi.item_date) AS admit_year,
        #   COUNT(DISTINCT pi.patient_id) AS num_unique_patients,
        # FROM
        #   patient_item AS pi
        # JOIN
        #   clinical_item AS ci
        # ON
        #   pi.clinical_item_id = ci.clinical_item_id
        # WHERE
        #   ci.clinical_item_id in (admitDxClinicalItemIds)
        # GROUP BY
        #   icd_code,
        #   admit_dx,
        #   admit_year
        #   num_unique_patients
        # ORDER BY
        #   admit_year,
        #   num_unique_patients DESC
        query = SQLQuery()
        query.addSelect("ci.name AS icd_code")
        query.addSelect("ci.description AS admit_dx")
        query.addSelect("EXTRACT(YEAR FROM pi.item_date) AS admit_year")
        query.addSelect("COUNT(DISTINCT pi.patient_id) AS num_unique_patients")
        query.addFrom("patient_item AS pi")
        query.addJoin("clinical_item AS ci",
                      "pi.clinical_item_id = ci.clinical_item_id")
        query.addWhereIn("ci.clinical_item_id", admitDxClinicalItemIds)
        query.addGroupBy("icd_code")
        query.addGroupBy("admit_dx")
        query.addGroupBy("admit_year")
        query.addGroupBy("num_unique_patients")
        query.addOrderBy("icd_code")
        query.addOrderBy("admit_year")
        query.addOrderBy("num_unique_patients DESC")

        # Execute query.
        results = DBUtil.execute(query)
    def _getPatientsComponentsHistories(self, item_ids):
        query = SQLQuery()
        # SELECT
        query.addSelect('patient_id')
        query.addSelect('name')
        query.addSelect('item_date')
        # FROM
        query.addFrom('clinical_item as ci')
        query.addFrom('patient_item as pi')
        # WHERE
        query.addWhere('ci.clinical_item_id = pi.clinical_item_id')
        query.addWhereIn('ci.clinical_item_id', item_ids)

        query.addOrderBy('patient_id')
        query.addOrderBy('item_date')

        # print query
        # print query.getParams()
        return customDBUtil.execute(query)
示例#23
0
    def _getPatientsLabsHistories(self, proc_codes):
        query = SQLQuery()
        # SELECT
        query.addSelect('pat_id')
        query.addSelect('abnormal_yn')
        query.addSelect('result_time')
        query.addSelect('proc_code')
        # FROM
        query.addFrom('stride_order_proc')
        # query.addFrom('patient_item as pi')
        # WHERE
        query.addWhereEqual('lab_status', 'Final result')
        # query.addWhereEqual('proc_code', proc_code)
        query.addWhereIn('proc_code', proc_codes)

        query.addOrderBy('proc_code')
        query.addOrderBy('pat_id')
        query.addOrderBy('result_time')

        return customDBUtil.execute(query)
示例#24
0
def add_resident_column(columns_order, csv, survey_file):
    survey_responses = pd.read_csv(survey_file)
    # retrieve sim_user_ids
    query = SQLQuery()
    query.addSelect("sim_user_id")
    query.addSelect("name")
    query.addFrom("sim_user")
    query.addWhereIn("name", survey_responses['Physician User Name'])
    user_ids = DBUtil.execute(query)
    survey_responses = pd.merge(survey_responses,
                                pd.DataFrame(user_ids,
                                             columns=['sim_user_id', 'name']),
                                left_on='Physician User Name',
                                right_on='name')
    csv = pd.merge(csv,
                   survey_responses,
                   left_on='user',
                   right_on='sim_user_id')
    columns_order.insert(1, 'resident')
    return csv
示例#25
0
    def _get_random_patient_list(self):
        #sx: this function is for avoid RANDOM() on the database
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
        query.addFrom('stride_order_proc AS sop')
        query.addFrom('stride_order_results AS sor')
        query.addWhere('sop.order_proc_id = sor.order_proc_id')
        ##
        query.addWhereIn("base_name", [self._component])
        query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')

        results = DBUtil.execute(query)

        order_counts = [ row[1] for row in results ]


        if len(results) == 0:
            error_msg = '0 orders for component "%s."' % self._component #sx
            log.critical(error_msg)
            sys.exit('[ERROR] %s' % error_msg)
        else:
            avg_orders_per_patient = numpy.median(order_counts)
            log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
            # Based on average # of results, figure out how many patients we'd
            # need to get for a feature matrix of requested size.
            self._num_patients = int(numpy.max([self._num_requested_episodes / \
                avg_orders_per_patient, 1]))
            # Some components may have fewer associated patients than the required sample size
            patient_number_chosen = min([len(results),self._num_patients]) #
            inds_random_patients = numpy.random.choice(len(results), size=patient_number_chosen, replace=False)
            # print 'inds_random_patients:', inds_random_patients
            pat_IDs_random_patients = []
            for ind in inds_random_patients:
                pat_IDs_random_patients.append(results[ind][0])
            # print pat_IDs_random_patients
            return pat_IDs_random_patients
示例#26
0
def queryDrugScreens( patientDF, period, locations ):
    log.info("Populate drug screens by primary locations");

    query = SQLQuery();
    query.addSelect("pat_id");
    query.addSelect("count(distinct order_proc_id)");
    query.addFrom("stride_order_proc_drug_screen");
    query.addWhere("ordering_mode = 'Outpatient'");
    query.addWhereIn("patient_location", locations );
    query.addWhereOp("ordering_date",">", period[0]);
    query.addWhereOp("ordering_date","<", period[-1]);
    query.addWhereIn("pat_id", patientDF["patientId"] );
    query.addGroupBy("pat_id");

    results = DBUtil.execute(query);
    cols = ["patientId","nDrugScreens"];
    newDF = pd.DataFrame(results,columns=cols);
    patientDF = patientDF.merge(newDF, how="left");
    patientDF["nDrugScreens"][np.isnan(patientDF["nDrugScreens"])] = 0;    # Populate default values if no data
    patientDF["nDrugScreens"] = patientDF["nDrugScreens"].astype("int");    # Beware of float conversion somewhere
    return patientDF;
示例#27
0
    def action_default(self):
        # Convert query category ID(s) into a list, even of size 1
        categoryIds = self.requestData["clinical_item_category_id"].split(",")

        query = SQLQuery()
        query.addSelect("ci.clinical_item_id")
        query.addSelect("ci.name")
        query.addSelect("ci.description")
        query.addSelect("ci.item_count")
        query.addFrom("clinical_item as ci")
        query.addWhere("analysis_status = 1")
        # Ignore specified items
        query.addWhereIn("ci.clinical_item_category_id", categoryIds)
        query.addOrderBy(self.requestData["orderBy"])

        resultTable = DBUtil.execute(query, includeColumnNames=True)
        resultModels = modelListFromTable(resultTable)

        optionValues = []
        optionTexts = []

        displayFields = ("name", "description", "item_count")

        for resultModel in resultModels:
            optionValues.append(str(resultModel["clinical_item_id"]))

            orderField = self.requestData["orderBy"].split()[0]
            orderValue = resultModel[orderField]
            textValueList = [str(orderValue)]
            for field in displayFields:
                if field != orderField:
                    textValueList.append(str(resultModel[field]))

            textValue = str.join(" - ", textValueList)

            optionTexts.append(textValue)

        # Conveniently, Python string representation coincides with JavaScript
        self.requestData["optionValuesJSON"] = str(optionValues)
        self.requestData["optionTextsJSON"] = str(optionTexts)
示例#28
0
 def loadUserInfo(self, userIds=None, conn=None):
     """Load basic information about the specified users
     """
     extConn = True
     if conn is None:
         conn = self.connFactory.connection()
         extConn = False
     try:
         query = SQLQuery()
         query.addSelect("su.sim_user_id")
         query.addSelect("su.name")
         query.addFrom("sim_user as su")
         if userIds is not None:
             query.addWhereIn("su.sim_user_id", userIds)
         dataTable = DBUtil.execute(query,
                                    includeColumnNames=True,
                                    conn=conn)
         dataModels = modelListFromTable(dataTable)
         return dataModels
     finally:
         if not extConn:
             conn.close()
示例#29
0
    def fetch_components_in_panel(lab_panel):
        # Doing a single query results in a sequential scan through
        # stride_order_results. To avoid this, break up the query in two.
        # First, get all the order_proc_ids for proc_code.
        query = SQLQuery()
        query.addSelect('order_proc_id')
        query.addFrom('stride_order_proc')
        query.addWhereIn('proc_code', [lab_panel])
        query.addGroupBy('order_proc_id')
        results = DBUtil.execute(query)
        lab_order_ids = [row[0] for row in results]

        # Second, get all base_names from those orders.
        query = SQLQuery()
        query.addSelect('base_name')
        query.addFrom('stride_order_results')
        query.addWhereIn('order_proc_id', lab_order_ids)
        query.addGroupBy('base_name')
        results = DBUtil.execute(query)
        components = [row[0] for row in results]

        return components
示例#30
0
 def loadStateInfo(self, stateIds=None, conn=None):
     """Load basic information about the specified patient states
     """
     extConn = True
     if conn is None:
         conn = self.connFactory.connection()
         extConn = False
     try:
         query = SQLQuery()
         query.addSelect("ss.sim_state_id")
         query.addSelect("ss.name")
         query.addSelect("ss.description")
         query.addFrom("sim_state as ss")
         if stateIds is not None:
             query.addWhereIn("ss.sim_state_id", stateIds)
         dataTable = DBUtil.execute(query,
                                    includeColumnNames=True,
                                    conn=conn)
         dataModels = modelListFromTable(dataTable)
         return dataModels
     finally:
         if not extConn:
             conn.close()