def test_recommender_aggregation(self): # Test different scoring aggregation methods query = RecommenderQuery() query.countPrefix = "patient_" query.queryItemIds = set([-2, -5]) #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data headers = ["clinical_item_id", "conditionalFreq", "freqRatio"] # Default weighted aggregation method expectedData = \ [ RowItemModel( [-4, 0.3, 22.5], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Change to unweighted aggregation method query.aggregationMethod = "unweighted" expectedData = \ [ RowItemModel( [-4, 0.32857, 24.64286], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Change to Serial Bayes aggregation method query.aggregationMethod = "SerialBayes" expectedData = \ [ RowItemModel( [-4, 0.89157, 66.867471], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Naive Bayes aggregation query.aggregationMethod = "NaiveBayes" expectedData = \ [ RowItemModel( [-4, 3.75, 281.25], headers ), # Without truncating negative values #RowItemModel( [-4, 0.8, 58.59707], headers ), # With truncating negative values RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Apply value filter query.fieldFilters["freqRatio>"] = 10.0 expectedData = \ [ RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query)
def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.sortField = "tf" query.limit = 16 # Go ahead and query for all since short list and can get expected calculation results for all query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id", "score"] expectedData = \ [ RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), RowItemModel( [-1, 1.0/13], headers ), RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), RowItemModel( [-14,1.0/13], headers ), RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) expectedData = \ [ RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), RowItemModel( [-1, 1.0/13], headers ), RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), RowItemModel( [-14,1.0/13], headers ), RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), RowItemModel( [-6, 2.0/13], headers ), #RowItemModel( [-1, 1.0/13], headers ), #RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), #RowItemModel( [-14,1.0/13], headers ), #RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6, -10]) query.excludeCategoryIds = set([-1, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-2, 2.0/13], headers ), RowItemModel( [-5, 2.0/13], headers ), #RowItemModel( [-6, 2.0/13], headers ), #RowItemModel( [-1, 1.0/13], headers ), #RowItemModel( [-3, 1.0/13], headers ), RowItemModel( [-7, 1.0/13], headers ), RowItemModel( [-8, 1.0/13], headers ), #RowItemModel( [-10,1.0/13], headers ), RowItemModel( [-11,1.0/13], headers ), RowItemModel( [-12,1.0/13], headers ), RowItemModel( [-13,1.0/13], headers ), #RowItemModel( [-14,1.0/13], headers ), #RowItemModel( [-15,1.0/13], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() expectedData = \ [ RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) expectedData = \ [ #RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), #RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() expectedData = \ [ RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ), #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ), #RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ), RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ), RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ), RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ), RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query, sort by TF*IDF lift.") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.sortField = "lift" expectedData = \ [ #RowItemModel( [-5, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ), #RowItemModel( [-2, (13.0/2)*((1.0/6)*(1.0/2)+(1.0/6)*(2.0/2))], headers ), RowItemModel( [-3, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-7, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-8, (13.0/1)*((1.0/6)*(2.0/2))], headers ), RowItemModel( [-6, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ), RowItemModel( [-14,(13.0/1)*((1.0/4)*(1.0/2))], headers ), RowItemModel( [-15,(13.0/1)*((1.0/4)*(1.0/2))], headers ), RowItemModel( [-1, (13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-10,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-11,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-12,(13.0/1)*((1.0/6)*(1.0/2))], headers ), RowItemModel( [-13,(13.0/1)*((1.0/6)*(1.0/2))], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query)
def test_dataCache(self): # Test that repeating queries with cache turned on will not result in extra DB queries query = RecommenderQuery() query.countPrefix = "patient_" query.queryItemIds = set([-2, -5]) #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data headers = ["clinical_item_id", "conditionalFreq", "freqRatio"] # First query without cache self.recommender.dataManager.dataCache = None baselineData = self.recommender(query) baselineQueryCount = self.recommender.dataManager.queryCount # Redo query with cache self.recommender.dataManager.dataCache = dict() newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) # Ensure getting same results self.assertNotEqual(baselineQueryCount, newQueryCount) # Expect needed more queries since no prior cache baselineQueryCount = newQueryCount # Again, but should be no new query since have cached results last time newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # Repeat multiple times, should still have no new query activity # prog = ProgressDots(10,1,"repeats"); for iRepeat in xrange(10): newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # prog.update(); # prog.printStatus(); # Query for subset should still yield no new query query.queryItemIds = set([-2]) newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqual(baselineQueryCount, newQueryCount) # Expect no queries for subsets # Repeat query for subset newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # Expect no queries for subsets # Query for partial subset, partial new query.queryItemIds = set([-5, -6]) newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqual(baselineQueryCount, newQueryCount) # Expect now new queries for subsets, because first query should have done mass-all query # Repeat for partial subset, no longer new newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount)
def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query but set a limit on time delta worth counting item associations" ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query)