Exemplo n.º 1
0
    def updateClinicalItemCounts(self, acceptCache=False, conn=None):
        """Update the summary item_counts for clinical_items based
        on clinical_item_association summary counts.

        If acceptCache is True, then will first check for existence of an entry "clinicalItemCountsUpdated"
            in the data_cache table.  If it exists, assume we have done this update already, and no need to force the calculations again
        """

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            if acceptCache:
                isCacheUpdated = (self.getCacheData(
                    "clinicalItemCountsUpdated", conn=conn) is not None)
                if isCacheUpdated:
                    # Item count caches already updated, no need to recalculate them
                    return

            # First reset all counts to zero
            query = "update clinical_item set item_count = 0, patient_count = 0, encounter_count = 0 "
            params = []
            if self.maxClinicalItemId is not None:  # Restrict to (test) data
                query += "where clinical_item_id < %s" % DBUtil.SQL_PLACEHOLDER
                params.append(self.maxClinicalItemId)
            DBUtil.execute(query, params, conn=conn)

            sqlQuery = SQLQuery()
            sqlQuery.addSelect("clinical_item_id")
            sqlQuery.addSelect("count_0 as item_count")
            sqlQuery.addSelect("patient_count_0 as patient_count")
            sqlQuery.addSelect("encounter_count_0 as encounter_count")
            sqlQuery.addFrom("clinical_item_association as ci")
            sqlQuery.addWhere("clinical_item_id = subsequent_item_id")
            # Look along "diagonal" of matrix for primary summary stats
            if self.maxClinicalItemId is not None:  # Restrict to (test) data
                sqlQuery.addWhereOp("clinical_item_id", "<",
                                    self.maxClinicalItemId)

            resultTable = DBUtil.execute(sqlQuery,
                                         includeColumnNames=True,
                                         conn=conn)
            resultModels = modelListFromTable(resultTable)

            for result in resultModels:
                DBUtil.updateRow("clinical_item",
                                 result,
                                 result["clinical_item_id"],
                                 conn=conn)

            # Make a note that this cache data has been updated
            self.setCacheData("clinicalItemCountsUpdated", "True", conn=conn)
        finally:
            if not extConn:
                conn.close()
Exemplo n.º 2
0
 def clinicalItemFromSourceItem(self, sourceItem, category, conn):
     # Load or produce a clinical_item record model for the given sourceItem
     clinicalItemKey = (category["clinical_item_category_id"],
                        sourceItem["code"])
     if clinicalItemKey not in self.clinicalItemByCategoryIdCode:
         # Clinical Item does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
         clinicalItem = RowItemModel({
             "clinical_item_category_id":
             category["clinical_item_category_id"],
             "external_id":
             sourceItem["medication_id"],
             "name":
             sourceItem["code"],
             "description":
             sourceItem["med_description"],
         })
         (clinicalItemId, isNew) = DBUtil.findOrInsertItem("clinical_item",
                                                           clinicalItem,
                                                           conn=conn)
         clinicalItem["clinical_item_id"] = clinicalItemId
         self.clinicalItemByCategoryIdCode[clinicalItemKey] = clinicalItem
     else:
         # Clinical Item does exist, but check for redundancies and opportunities to
         #   simplify different descriptions for the same medication
         priorClinicalItem = self.clinicalItemByCategoryIdCode[
             clinicalItemKey]
         priorDescription = priorClinicalItem["description"]
         if len(sourceItem["med_description"]) < len(
                 priorDescription) or priorDescription.startswith(
                     TEMPLATE_MEDICATION_PREFIX):
             # Prior medication recorded description either a generic template,
             #   or a longer version than necessary, that can be replaced with the current one
             priorClinicalItem["description"] = sourceItem[
                 "med_description"]
             DBUtil.updateRow("clinical_item",
                              priorClinicalItem,
                              priorClinicalItem["clinical_item_id"],
                              conn=conn)
     return self.clinicalItemByCategoryIdCode[clinicalItemKey]
Exemplo n.º 3
0
    def signOrders(self,
                   userId,
                   patientId,
                   currentTime,
                   orderItemIds,
                   discontinuePatientOrderIds=None,
                   conn=None):
        """Commit new order item IDs for the given patient and starting now,
        and discontinue (set end date) for any existing orders specified.
        
        Record any patient state transitions the orders would trigger
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Denormalized recording of current patient state to facilitate easy retrieval linked to orders later
            patientInfo = self.loadPatientInfo([patientId],
                                               currentTime,
                                               conn=conn)[0]
            stateId = patientInfo["sim_state_id"]
            postStateIdByItemId = patientInfo["postStateIdByItemId"]

            orderItemIdSet = set(orderItemIds)
            # Ensure unique and facilitate set operations

            insertDict = {
                "sim_user_id": userId,
                "sim_patient_id": patientId,
                "sim_state_id": stateId,
                "relative_time_start": currentTime
            }
            for itemId in orderItemIdSet:
                insertDict["clinical_item_id"] = itemId
                DBUtil.insertRow("sim_patient_order", insertDict, conn=conn)

            # See if any of these new orders triggered state transitions
            triggerItemIds = postStateIdByItemId.viewkeys() & orderItemIdSet
            while triggerItemIds:  # Found a trigger item
                triggerItemId = None
                if len(
                        triggerItemIds
                ) > 1:  # Found multiple. Weird. Arbitrarily act on the one that appeared first in the input list
                    for itemId in orderItemIds:
                        if itemId in triggerItemIds:
                            triggerItemId = itemId
                            break
                else:
                    triggerItemId = triggerItemIds.pop()
                postStateId = postStateIdByItemId[triggerItemId]

                # Record the state transition
                self.recordStateTransition(patientId,
                                           stateId,
                                           postStateId,
                                           currentTime,
                                           conn=conn)

                # Reload patientInfo to reflect new patient state
                patientInfo = self.loadPatientInfo([patientId],
                                                   currentTime,
                                                   conn=conn)[0]
                stateId = patientInfo["sim_state_id"]
                postStateIdByItemId = patientInfo["postStateIdByItemId"]

                orderItemIdSet.discard(triggerItemId)
                # Don't keep looking for this one, important to avoid infinite loop
                triggerItemIds = postStateIdByItemId.viewkeys(
                ) & orderItemIdSet

            if discontinuePatientOrderIds is not None:
                updateDict = {
                    "relative_time_end": currentTime
                }
                for patientOrderId in discontinuePatientOrderIds:
                    DBUtil.updateRow("sim_patient_order",
                                     updateDict,
                                     patientOrderId,
                                     conn=conn)
        finally:
            conn.commit()
            if not extConn:
                conn.close()
Exemplo n.º 4
0
 def updateMetricDescriptionLines(self):
     for metricId, descriptionLines in self.metricLineDescriptionsById.items(
     ):
         description = str.join(' ', descriptionLines)
         DBUtil.updateRow("metric", {"description": description}, metricId)
Exemplo n.º 5
0
    def test_analyzePatientItems(self):
        # Run the association analysis against the mock test data above and verify
        #   expected stats afterwards.

        associationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800,
                patient_count_2592000, patient_count_7776000, patient_count_31536000,
                patient_count_any,
                patient_time_diff_sum, patient_time_diff_sum_squares
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """

        log.debug("Use incremental update, including date filters to start.")
        analysisOptions = AnalysisOptions()
        analysisOptions.patientIds = [-22222, -33333]
        analysisOptions.startDate = datetime(2000, 1, 9)
        analysisOptions.endDate = datetime(2000, 2, 11)
        self.analyzer.analyzePatientItems(analysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [-11, -6,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -6,-11,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -6, -6,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
            ]

        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        log.debug(
            "Use incremental update, only doing the update based on a part of the data."
        )
        analysisOptions = AnalysisOptions()
        analysisOptions.patientIds = [-22222, -33333]
        self.analyzer.analyzePatientItems(analysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
                [-11, -7,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-11, -6,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7,-11,   0, 0, 0, 1, 1, 1, 1, 1,  345600.0, 119439360000.0],
                [ -7, -7,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7, -6,   0, 0, 0, 1, 1, 1, 1, 1,  345600.0, 119439360000.0],

                [ -6,-11,   1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0],
                [ -6, -7,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -6, -6,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
            ]

        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        log.debug(
            "Expand incremental update, by now including additional clinical items whose analysis status previously excluded them."
        )
        DBUtil.updateRow("clinical_item", {"analysis_status": 1}, -2)
        analysisOptions.patientIds = [-22222, -33333]
        self.analyzer.analyzePatientItems(analysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
                [-11, -7,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-11, -6,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [-11, -2,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7,-11,   0, 0, 0, 1, 1, 1, 1, 1,  345600.0, 119439360000.0],
                [ -7, -7,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7, -6,   0, 0, 0, 1, 1, 1, 1, 1,  345600.0, 119439360000.0],

                [ -6,-11,   1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0],
                [ -6, -7,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -6, -6,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],

                [ -2,-11,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -2, -2,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
            ]

        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        log.debug(
            "Incremental update that includes a single patient data being split, so have to account for all of those dependencies"
        )

        headers = [
            "patient_item_id", "encounter_id", "patient_id",
            "clinical_item_id", "item_date"
        ]
        dataModels = \
            [
                RowItemModel( [-1111, -334, -33333, -3,  datetime(2000, 2,11, 8)], headers ),
            ]
        for dataModel in dataModels:
            (dataItemId,
             isNew) = DBUtil.findOrInsertItem("patient_item", dataModel)

        analysisOptions.patientIds = [-22222, -33333]
        self.analyzer.analyzePatientItems(analysisOptions)

        expectedAssociationStats = \
            [
                [-11,-11,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
                [-11, -7,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-11, -6,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [-11, -3,   0, 0, 1, 1, 1, 1, 1, 1,  28800.0, 829440000.0],
                [-11, -2,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7,-11,   0, 0, 0, 1, 1, 1, 1, 1,  345600.0, 119439360000.0],
                [ -7, -7,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -7, -6,   0, 0, 0, 1, 1, 1, 1, 1,  345600.0, 119439360000.0],

                [ -6,-11,   1, 1, 1, 2, 2, 2, 2, 2,  172800.0, 29859840000.0],
                [ -6, -7,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -6, -6,   2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
                [ -6, -3,   0, 0, 0, 1, 1, 1, 1, 1,  201600.0, 40642560000.0],

                [ -3,-11,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -3, -6,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -3, -3,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -3, -2,   0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],

                [ -2,-11,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -2, -3,   0, 0, 1, 1, 1, 1, 1, 1,  28800.0, 829440000.0],
                [ -2, -2,   1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
            ]
        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        log.debug(
            "Negative test case, repeating analysis should not change any results"
        )
        analysisOptions.patientIds = [-22222, -33333]
        self.analyzer.analyzePatientItems(analysisOptions)
        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        log.debug("Complete the remaining incremental update")
        analysisOptions.patientIds = [-11111, -22222, -33333]
        self.analyzer.analyzePatientItems(analysisOptions)

        expectedAssociationStats = \
            [   # Note that sum-squares in particular gets rounded off due to large values
                [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-12,  -8, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-12,  -4, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2,  0.0, 0.0],
                [-11,  -7, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [-11,  -6, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [-11,  -3, 0, 0, 1, 1, 1, 1, 1, 1,  28800.0, 829440000.0],
                [-11,  -2, 1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [-10, -12, 0, 0, 0, 0, 0, 1, 1, 1, 2678400.0, 7173830000000.0],    # Longer time diff?
                [-10, -10, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [-10,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                [-10,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [ -8, -12, 0, 0, 0, 0, 0, 1, 1, 1, 2671200.0, 7135310000000.0],     # Longer diff
                [ -8, -10, 0, 0, 1, 1, 1, 1, 1, 1, 79200.0, 6272640000.0],
                [ -8,  -8, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [ -8,  -4, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                [ -7,  -7, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [ -7,  -6, 0, 0, 0, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 172800.0, 29859840000.0],
                [ -6,  -7, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -6,  -6, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                [ -6,  -3, 0, 0, 0, 1, 1, 1, 1, 1, 201600.0, 40642600000.0],
                [ -4, -12, 0, 0, 0, 0, 0, 1, 1, 1, 2678400.0, 7173830000000.0], # ???
                [ -4, -10, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [ -4,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                [ -4,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -3,  -6, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -3,  -3, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                [ -3,  -2, 0, 0, 0, 0, 0, 0, 0, 0,  0.0, 0.0],
                [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1,  0.0, 0.0],
                [ -2,  -3, 0, 0, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0],
                [ -2,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0]
            ]

        associationStats = DBUtil.execute(associationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        # Check the association stats for non-unique counts as well (allowing for repeats)
        nonUniqueAssociationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                count_0, count_3600, count_86400, count_604800,
                count_2592000, count_7776000, count_31536000, count_126144000,
                count_any,
                time_diff_sum, time_diff_sum_squares
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """
        expectedAssociationStats = \
            [   # Note that sum-squares in particular gets rounded off due to large values
                 [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-12,  -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-12,  -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                 [-11,  -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-11,  -6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-11,  -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0],
                 [-11,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-10, -12, 0, 0, 0, 0, 1, 2, 2, 2, 2, 5270400.0, 13892300000000.0],
                 [-10, -10, 2, 2, 3, 3, 3, 3, 3, 3, 3, 86400.0, 7464960000.0],
                 [-10,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                 [-10,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -8, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2671200.0, 7135310000000.0],
                 [ -8, -10, 0, 0, 1, 1, 1, 1, 1, 1, 1, 79200.0, 6272640000.0],
                 [ -8,  -8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -8,  -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                 [ -7,  -7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -7,  -6, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                 [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 172800.0, 29859800000.0],
                 [ -6,  -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -6,  -6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                 [ -6,  -3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 201600.0, 40642600000.0],
                 [ -4, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2678400.0, 7173830000000.0],
                 [ -4, -10, 1, 1, 2, 2, 2, 2, 2, 2, 2, 86400.0, 7464960000.0],
                 [ -4,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                 [ -4,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -3,  -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -3,  -3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -3,  -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -2,  -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0],
                 [ -2,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
            ]
        associationStats = DBUtil.execute(nonUniqueAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        # Again for patient level counts
        patientAssociationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                patient_count_0, patient_count_3600, patient_count_86400, patient_count_604800,
                patient_count_2592000, patient_count_7776000, patient_count_31536000, patient_count_126144000,
                patient_count_any,
                patient_time_diff_sum, patient_time_diff_sum_squares
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """
        expectedAssociationStats = \
            [   # Note that sum-squares in particular gets rounded off due to large values
                 [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-12,  -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-12,  -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                 [-11,  -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-11,  -6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-11,  -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0],
                 [-11,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-10, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2678400.0, 2678400.0*2678400.0],    # Main difference.  Duplicates within a single patient, only count once
                 [-10, -10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],   # Don't count duplicates
                 [-10,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                 [-10,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -8, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2671200.0, 7135310000000.0],
                 [ -8, -10, 0, 0, 1, 1, 1, 1, 1, 1, 1, 79200.0, 6272640000.0],
                 [ -8,  -8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -8,  -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                 [ -7,  -7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -7,  -6, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                 [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 172800.0, 29859800000.0],
                 [ -6,  -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -6,  -6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                 [ -6,  -3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 201600.0, 40642600000.0],
                 [ -4, -12, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2678400.0, 7173830000000.0],
                 [ -4, -10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],  # Don't count repeats
                 [ -4,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                 [ -4,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -3,  -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -3,  -3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -3,  -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -2,  -3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 28800.0, 829440000.0],
                 [ -2,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
            ]
        associationStats = DBUtil.execute(patientAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)

        # Again for encounter level counts
        encounterAssociationQuery = \
            """
            select
                clinical_item_id, subsequent_item_id,
                encounter_count_0, encounter_count_3600, encounter_count_86400, encounter_count_604800,
                encounter_count_2592000, encounter_count_7776000, encounter_count_31536000, encounter_count_126144000,
                encounter_count_any,
                encounter_time_diff_sum, encounter_time_diff_sum_squares
            from
                clinical_item_association
            where
                clinical_item_id < 0
            order by
                clinical_item_id, subsequent_item_id
            """
        expectedAssociationStats = \
            [   # Note that sum-squares in particular gets rounded off due to large values
                 [-12, -12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-12, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-12,  -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-12,  -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-11, -11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                 [-11,  -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [-11,  -6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-11,  -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],   # No longer related in separate encounters
                 [-11,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [-10, -12, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2592000.0, 2592000.0*2592000.0],    # Only count the relation within a common encounter
                 [-10, -10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],   # Now count for separate encounters
                 [-10,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                 [-10,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -8, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters
                 [ -8, -10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],  # No longer related in separate encounters
                 [ -8,  -8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -8,  -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -7, -11, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                 [ -7,  -7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -7,  -6, 0, 0, 0, 1, 1, 1, 1, 1, 1, 345600.0, 119439000000.0],
                 [ -6, -11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 172800.0, 29859800000.0],
                 [ -6,  -7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -6,  -6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0.0, 0.0],
                 [ -6,  -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],   # No longer related in separate encounters
                 [ -4, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0], # No longer related in separate encounters
                 [ -4, -10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -4,  -8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 7200.0, 51840000.0],
                 [ -4,  -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -3, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -3,  -6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -3,  -3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -3,  -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],
                 [ -2, -11, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
                 [ -2,  -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0],   # No longer related in separate encounters
                 [ -2,  -2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.0, 0.0],
            ]
        associationStats = DBUtil.execute(encounterAssociationQuery)
        self.assertEqualTable(expectedAssociationStats,
                              associationStats,
                              precision=3)
Exemplo n.º 6
0
    def mergeRelated(self,
                     baseClinicalItemId,
                     clinicalItemIds,
                     reassignMergedItems=True,
                     conn=None):
        """The specified clinical items will be merged / composited into the base clinical item provided.
        The remaining now redundant items will be deactivated
        Patient_item instances will be reassigned to the merged clinical_item
        (while backup links will be saved to backup_link_patient_item),
        clinical_item_association counts for the redundant items will removed and analyze_dates reset,
        requiring a re-run of AssociationAnalysis to redo those counts from scratch
        (but will now count as the merged / composite item rather than separate ones).

        Could theoretically figure out how to combine the association stats without re-running analysis, but
            patient_counts are supposed to ignore duplicates, so hard to know how to aggregate stats
            (not enough info in them to tell if unique cooccurrences?)

        Examples this could be relevant for:
        All blood transfusion indexes, G vs J vs Feeding tube equivalent, Ear, Eyes med routes irrelevant which ear/eye.
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            # Deactivate other items
            deactivateIds = set(clinicalItemIds)
            deactivateIds.discard(baseClinicalItemId)
            self.deactivateAnalysis(deactivateIds, conn=conn)

            # Build composite item name and description
            allIds = set(deactivateIds)
            allIds.add(baseClinicalItemId)

            query = SQLQuery()
            query.addSelect("clinical_item_id")
            query.addSelect("name")
            query.addSelect("description")
            query.addFrom("clinical_item")
            query.addWhereIn("clinical_item_id", allIds)
            query.addOrderBy("name")
            # Ensure consistency across multiple runs
            results = DBUtil.execute(query, conn=conn)

            nameList = list()
            descrList = list()
            # First pass to get Base Item Description
            for (clinicalItemId, name, description) in results:
                if clinicalItemId == baseClinicalItemId:
                    if name is None:
                        name = ""
                    if description is None:
                        description = ""
                    nameList.append(name)
                    descrList.append(description)
                    break
            # Second pass to get the rest
            for (clinicalItemId, name, description) in results:
                if clinicalItemId != baseClinicalItemId:
                    if name is None:
                        name = ""
                    if description is None:
                        description = ""
                    nameList.append(name)
                    descrList.append(description)
            compositeName = str.join("+", nameList)
            compositeDescription = str.join("+", descrList)

            DBUtil.updateRow("clinical_item", {
                "name": compositeName,
                "description": compositeDescription
            },
                             baseClinicalItemId,
                             conn=conn)

            if reassignMergedItems:
                # Reassign other items to the base item, but save backup data first
                query = SQLQuery()
                query.addSelect("patient_item_id")
                query.addSelect("clinical_item_id")
                query.addFrom("patient_item")
                query.addWhereIn("clinical_item_id", deactivateIds)
                results = DBUtil.execute(query, conn=conn)

                insertQuery = DBUtil.buildInsertQuery(
                    "backup_link_patient_item",
                    ["patient_item_id", "clinical_item_id"])
                for (patientItemId, clinicalItemId) in results:
                    insertParams = (patientItemId, clinicalItemId)
                    try:
                        # Optimistic insert of a new unique item
                        DBUtil.execute(insertQuery, insertParams, conn=conn)
                    except conn.IntegrityError, err:
                        # If turns out to be a duplicate, okay, just note it and continue to insert whatever else is possible
                        log.info(err)
                        pass

                # Now to actual reassignment of patient items to the unifying base clinical item
                placeholders = generatePlaceholders(len(deactivateIds))
                query = "update patient_item set clinical_item_id = %s where clinical_item_id in (%s)" % (
                    DBUtil.SQL_PLACEHOLDER, placeholders)
                params = [baseClinicalItemId]
                params.extend(deactivateIds)
                DBUtil.execute(query, params, conn=conn)

        finally:
            if not extConn:
                conn.close()
Exemplo n.º 7
0
    def normalizeMedIngredients(self, rxcuiDataByMedId, rowModel, convOptions, conn=None):
        """Given a rowModel of medication data, normalize it further.
        Specifically, look for common active ingredients to simplify the data.
        If the medication is actually a compound of multiple active ingredients,
        then break out into active ingredients.

        If normalizeMixtures set, then will yield out multiple items to reflect each active ingredient.
        If normalizeMixtures not set, will yield a single item with name being a composite of the active ingredients.
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        medId = rowModel["medication_id"]

        if medId not in rxcuiDataByMedId:
            # No mapping entry found, just use the available generic medication data then
            rowModel["code"] = GENERIC_CODE_TEMPLATE % rowModel["medication_id"];
            yield rowModel;

        else:
            # Mapping entry found, yield a normalized model for each active ingredient found
            #   (will usually be a 1-to-1 relation, but sometimes multiple
            ingredientTheraClassByRxcui = rxcuiDataByMedId[medId];
            if len(ingredientTheraClassByRxcui) <= 1 or convOptions.normalizeMixtures:

                # Single ingredient or want component active ingredients to each have one record
                for (rxcui, (ingredient, theraClass)) in ingredientTheraClassByRxcui.iteritems():
                    # ~250/15000 RxCUI's don't have a defined active ingredient.
                    if ingredient is None:
                        continue

                    normalizedModel = RowItemModel(rowModel);
                    normalizedModel["medication_id"] = rxcui;
                    normalizedModel["code"] = RXCUI_CODE_TEMPLATE % rxcui;
                    normalizedModel["description"] = ingredient.title();

                    yield normalizedModel;
            elif convOptions.maxMixtureCount is not None and len(ingredientTheraClassByRxcui) > convOptions.maxMixtureCount:
                # Plan to denormalize, but excessively large mixture.  Forget it.
                rowModel["code"] = GENERIC_CODE_TEMPLATE % rowModel["medication_id"];
                yield rowModel;
            else:
                # Mixture of multiple ingredients and want to keep denormalized
                # Extract out the active ingredient names to make a composite based only on that unique combination
                ingredientRxcuiList = [ (ingredient, rxcui) for (rxcui, (ingredient, theraClass)) in ingredientTheraClassByRxcui.iteritems()];
                ingredientRxcuiList.sort();   # Ensure consistent order

                rxcuiStrList = list();
                ingredientList = list();
                for (ingredient, rxcui) in ingredientRxcuiList:
                    # ~250/15000 RxCUI's don't have a defined active ingredient.
                    if ingredient is None:
                        continue

                    rxcuiStrList.append(str(rxcui));
                    ingredientList.append(ingredient.title());
                rxcuiComposite = str.join(",", rxcuiStrList );
                ingredientComposite = str.join("-",ingredientList );

                #rowModel["medication_id"] = hash(rxcuiComposite);    # No, just stick to existing medication ID
                rowModel["code"] = GENERIC_CODE_TEMPLATE % medId;
                rowModel["description"] = ingredientComposite;
                yield rowModel;

            # Do some extra work here to see if we can figure out therapeutic / pharaceutical class labels based on available data
            if rowModel["thera_class"] is not None:
                theraClassNeedsPopulation = False;
                for (rxcui, (ingredient, theraClass)) in ingredientTheraClassByRxcui.iteritems():
                    if theraClass is None:
                        # Don't have a previously populated class labels for this medication ID, but just found it with this data.  Populate then.
                        theraClass = rowModel["thera_class"];
                        ingredientTheraClassByRxcui[rxcui] = (ingredient, theraClass);
                        theraClassNeedsPopulation = True;
                if theraClassNeedsPopulation:
                    rowDict = {"thera_class": rowModel["thera_class"], "pharm_class": rowModel["pharm_class"], "pharm_subclass": rowModel["pharm_subclass"],}
                    DBUtil.updateRow("stride_mapped_meds", rowDict, medId, idCol="medication_id", conn=conn);

        if not extConn:
            conn.close();