Пример #1
0
    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.sortField = "tf"
        query.limit = 16
        # Go ahead and query for all since short list and can get expected calculation results for all
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id", "score"]
        expectedData = \
            [   RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                RowItemModel( [-6, 2.0/13], headers ),
                RowItemModel( [-1, 1.0/13], headers ),
                RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                RowItemModel( [-14,1.0/13], headers ),
                RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        expectedData = \
            [   RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                RowItemModel( [-6, 2.0/13], headers ),
                RowItemModel( [-1, 1.0/13], headers ),
                RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                RowItemModel( [-14,1.0/13], headers ),
                RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        expectedData = \
            [   #RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                RowItemModel( [-6, 2.0/13], headers ),
                #RowItemModel( [-1, 1.0/13], headers ),
                #RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                #RowItemModel( [-14,1.0/13], headers ),
                #RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6, -10])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        expectedData = \
            [   #RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                #RowItemModel( [-6, 2.0/13], headers ),
                #RowItemModel( [-1, 1.0/13], headers ),
                #RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                #RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                #RowItemModel( [-14,1.0/13], headers ),
                #RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        expectedData = \
            [   RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ),
                RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ),

                RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        expectedData = \
            [   #RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ),
                #RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ),
                #RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ),

                #RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ),

                RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        expectedData = \
            [   RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ),

                #RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ),
                RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ),

                RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query, sort by TF*IDF lift.")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.sortField = "lift"
        expectedData = \
            [   #RowItemModel( [-5, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ),
                #RowItemModel( [-2, (13.0/2)*((1.0/6)*(1.0/2)+(1.0/6)*(2.0/2))], headers ),

                RowItemModel( [-3, (13.0/1)*((1.0/6)*(2.0/2))], headers ),
                RowItemModel( [-7, (13.0/1)*((1.0/6)*(2.0/2))], headers ),
                RowItemModel( [-8, (13.0/1)*((1.0/6)*(2.0/2))], headers ),

                RowItemModel( [-6, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ),

                RowItemModel( [-14,(13.0/1)*((1.0/4)*(1.0/2))], headers ),
                RowItemModel( [-15,(13.0/1)*((1.0/4)*(1.0/2))], headers ),

                RowItemModel( [-1, (13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-10,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-11,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-12,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-13,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)
Пример #2
0
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        # If patient is specified then modify query and exclusion list based on items already ordered for patient
        recentItemIds = set()
        if self.requestData["sim_patient_id"]:
            patientId = int(self.requestData["sim_patient_id"])
            simTime = int(self.requestData["sim_time"])

            # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of)
            manager = SimManager()
            recentItemIds = manager.recentItemIds(patientId, simTime)

        # Recommender Instance to test on
        self.recommender = ItemAssociationRecommender()
        self.recommender.dataManager.dataCache = webDataCache
        # Allow caching of data for rapid successive queries

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        # Exclude items already ordered for the patient from any recommended list
        query.excludeItemIds.update(recentItemIds)
        if not query.queryItemIds:  # If no specific query items specified, then use the recent patient item IDs
            query.queryItemIds.update(recentItemIds)

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
Пример #3
0
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
Пример #4
0
    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
                RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query but set a limit on time delta worth counting item associations"
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)
Пример #5
0
# Keep track of each guideline name set
itemIdsByAdmitDxId = dict()
for admitDxId, sectionName, guidelineName, itemId, itemName, itemDescription, itemCount in resultsTable:
    if admitDxId not in itemIdsByAdmitDxId:
        itemIdsByAdmitDxId[admitDxId] = set()
    itemIdsByAdmitDxId[admitDxId].add(itemId)
    admitDxIdSectionGuidelineNameTuples.add(
        (admitDxId, sectionName, guidelineName))

recommender = ItemAssociationRecommender()

for admitDxId, itemIds in itemIdsByAdmitDxId.iteritems():
    print >> sys.stderr, admitDxId, len(itemIds)
    recQuery = RecommenderQuery()
    recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds()
    recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(
    )
    recQuery.queryItemIds = [admitDxId]
    recQuery.timeDeltaMax = timedelta(1)
    # Within one day
    recQuery.countPrefix = "patient_"
    recQuery.limit = TOP_ITEM_COUNT

    # Top results by P-value
    recQuery.sortField = "P-YatesChi2-NegLog"
    results = recommender(recQuery)
    #recommender.formatRecommenderResults(results);
    for result in results:
        itemIds.add(result["clinical_item_id"])
        #print >> sys.stderr, result["description"];

    print >> sys.stderr, admitDxId, len(itemIds)