def assertEqualRecommendedData(self, expectedData, recommendedData, query): """Run assertEqualGeneral on the key components of the contents of the recommendation data. Don't necessarily care about the specific numbers that come out of the recommendations, but do care about consistency in rankings and relative order by the query.sortField """ lastScore = None for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure derived statistics are populated to enable comparisons ItemAssociationRecommender.populateDerivedStats( recommendedItem, expectedItem.keys()) self.assertEqualDict(expectedItem, recommendedItem, ["clinical_item_id"]) for key in expectedItem.iterkeys( ): # If specified, then verify a specific values if isinstance(expectedItem[key], float): self.assertAlmostEquals(expectedItem[key], recommendedItem[key], 5) else: self.assertEqual(expectedItem[key], recommendedItem[key]) if lastScore is not None: self.assertTrue(recommendedItem[query.sortField] <= lastScore) # Verify descending order of scores lastScore = recommendedItem[query.sortField] self.assertEqual(len(expectedData), len(recommendedData))
def __init__(self): BaseDynamicData.__init__(self) self.requestData["queryItemIds"] = "" self.requestData["targetItemIds"] = "" self.requestData["excludeItemIds"] = "" self.requestData["excludeCategoryIds"] = "" self.requestData["timeDeltaMax"] = "" self.requestData["sortField"] = "PPV" self.requestData["sortReverse"] = "True" self.requestData["resultCount"] = "10" self.requestData["invertQuery"] = "" self.requestData["showCounts"] = "" self.requestData["countPrefix"] = "" self.requestData["aggregationMethod"] = "weighted" self.requestData["fieldHeaders"] = "" self.requestData["dataRows"] = "" self.addHandler("resultCount", ItemRecommendationTable.action_default.__name__) self.recommender = ItemAssociationRecommender() # Instance to test on self.recommender.dataManager.dataCache = webDataCache
def assertEqualRecommendedDataStats(self, expectedData, recommendedData, headers): """Run assertEqualGeneral on the key components of the contents of the recommendation data. In this case, we do want to verify actual score / stat values match """ self.assertEqual( len(expectedData), len(recommendedData) ); for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure the recommendedData has all fields of interest populated / calculated ItemAssociationRecommender.populateDerivedStats( recommendedItem, headers ); for header in headers: expectedValue = expectedItem[header]; recommendedValue = recommendedItem[header]; msg = 'Dicts diff with key (%s). Verify = %s, Sample = %s' % (header, expectedValue, recommendedValue); self.assertAlmostEqual(expectedValue, recommendedValue, 3, msg);
def test_tripleSequence_virtualItem(self): # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-22222]) analysisQuery.baseCategoryId = -7 analysisQuery.queryTimeSpan = timedelta(0, 86400) analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14) #analysisQuery.recommender = BaselineFrequencyRecommender(); analysisQuery.recommender = ItemAssociationRecommender() analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.targetItemIds = set([-16]) analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Initial run without time limits on outcome measure colNames = ["patient_id", "outcome.-16", "score.-16"] expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o", "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender", '0,-22222', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames)
def isItemRecommendable(self, clinicalItemId, queryItemCountById, recQuery, categoryIdByItemId): """Decide if the next clinical item could even possibly appear in the recommendation list. (Because if not, no point in trying to test recommender against it). """ return ItemAssociationRecommender.isItemRecommendable( clinicalItemId, queryItemCountById, recQuery, categoryIdByItemId)
def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-11111]) analysisQuery.recommender = BaselineFrequencyRecommender() #analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Don't use items whose default is to be excluded from recommendations #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn); #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn); #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) ); # Time delta to use for queries, otherwise just default to all times colNames = [ "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank", "recScore" ] # Start with default recommender expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.170), Don't care about specific scores, as long as ranks are correct (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT), #0.032), (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT), #0.025), (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT), #0.053), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3) # Now try targeted recommender analysisQuery.recommender = ItemAssociationRecommender() expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.167), (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT), #0.304), (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT), #0.190), (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT), #0.444), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3) # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed analysisQuery.queryItemMax = 2 expectedResults = \ [ (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT), #0.167), (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT), #0.304), ] analysisResults = self.analyzer(analysisQuery) self.assertEqualTable(expectedResults, analysisResults, 3)
print("Creating clinical_item_id to description map") id2description = {} clinical_items = open('/Users/jwang/Desktop/Results/clinical_items.csv', "rU") clinical_items.readline() for line in clinical_items: line = line.strip().split(",") clinical_item_id = line[0] description = " ".join(line[1:]) id2description[clinical_item_id] = description # Reopen diagnoses, from the top of the file diagnoses = open('/Users/jwang/Desktop/Results/diagnoses_to_test.csv', "rU") diagnoses.readline() baseQueryStr = "&targetItemIds=&excludeItemIds=71052,71046,71054,71083,71045,71047&excludeCategoryIds=1,58,4,2,160,161,59,13,159,163,23,62,18,11,46,2&timeDeltaMax=86400&sortField=P-YatesChi2-NegLog&sortReverse=True&filterField1=prevalence<:&filterField2=PPV<:&filterField3=RR<:&filterField4=sensitivity<:&filterField5=P-YatesChi2<:&resultCount=4000&invertQuery=false&showCounts=true&countPrefix=patient_&aggregationMethod=weighted&cacheTime=0" recommender = ItemAssociationRecommender() diagnosis_count = 0 for line in diagnoses: line = line.strip().split(",") clinical_item_id = line[0] description = " ".join(line[1:]) queryStr = "queryItemIds=" + str(clinical_item_id) + baseQueryStr print('Finding Top Associations for "{0}"'.format(description)) # Build RecommenderQuery query = RecommenderQuery() paramDict = dict(urlparse.parse_qsl(queryStr, True)) query.parseParams(paramDict) # Call ItemRecommender
class RelatedOrders(BaseDynamicData): """Simple script to (dynamically) relay query and result data from the ItemRecommendation module in URL request then HTML table format. """ def __init__(self): BaseDynamicData.__init__(self) self.requestData["searchStr"] = "" self.requestData["analysisStatus"] = "1" self.requestData["sim_patient_id"] = "" self.requestData["sim_time"] = "" self.requestData["sourceTables"] = "stride_order_proc,stride_order_med" # Default comma-separated list of source tables to expect orders to reside in self.requestData["queryItemIds"] = "" self.requestData["targetItemIds"] = "" self.requestData["excludeItemIds"] = "" self.requestData["excludeCategoryIds"] = "" self.requestData["timeDeltaMax"] = "86400" # Look for recommendations likely within 24 hours self.requestData["sortField"] = "" self.requestData["enableRecommender"] = "True" # By default, asssume recommender is enabled self.requestData["displayFields"] = "" #"prevalence","PPV","RR","P-YatesChi2" self.requestData["sortReverse"] = "True" self.requestData["nPreCols"] = "1" self.requestData["groupByCategory"] = "True" self.requestData["resultCount"] = "10" # Default for related order search self.requestData["invertQuery"] = "" self.requestData["showCounts"] = "" self.requestData["countPrefix"] = "patient_" self.requestData["aggregationMethod"] = "weighted" self.requestData["title"] = "Order Search Results" self.requestData["fieldHeaders"] = "" self.requestData[ "dataRows"] = '<tr><td colspan=100 align=center height=200><img src="../../resource/ajax-loader.gif"></td></tr>' self.addHandler("searchStr", RelatedOrders.action_orderSearch.__name__) self.addHandler("RelatedOrders", RelatedOrders.action_default.__name__) def action_orderSearch(self): """Search for orders by query string""" manager = SimManager() query = ClinicalItemQuery() query.parseParams(self.requestData) query.sourceTables = self.requestData["sourceTables"].split(",") results = manager.clinicalItemSearch(query) lastModel = None for dataModel in results: dataModel["controls"] = CONTROLS_TEMPLATE % dataModel dataModel["nPreCols"] = self.requestData["nPreCols"] dataModel["category_description.format"] = "" if lastModel is None or lastModel[ "category_description"] != dataModel[ "category_description"]: dataModel[ "category_description.format"] = "<b>%s</b>" % dataModel[ "category_description"] # Only show category if new lastModel = dataModel colNames = ["controls", "description"] # "name" for order code. ,"category_description.format" lastModel = None htmlLines = list() for dataModel in results: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines) def action_default(self): """Look for related orders by association / recommender methods""" # If patient is specified then modify query and exclusion list based on items already ordered for patient recentItemIds = set() if self.requestData["sim_patient_id"]: patientId = int(self.requestData["sim_patient_id"]) simTime = int(self.requestData["sim_time"]) # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of) manager = SimManager() recentItemIds = manager.recentItemIds(patientId, simTime) # Recommender Instance to test on self.recommender = ItemAssociationRecommender() self.recommender.dataManager.dataCache = webDataCache # Allow caching of data for rapid successive queries query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") # Exclude items already ordered for the patient from any recommended list query.excludeItemIds.update(recentItemIds) if not query.queryItemIds: # If no specific query items specified, then use the recent patient item IDs query.queryItemIds.update(recentItemIds) recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines) def prepareDisplayHeaders(self, displayFields): showCounts = (self.requestData["showCounts"].lower() not in FALSE_STRINGS) fieldHeadersHTML = "" for displayField in displayFields: fieldHeadersHTML += '<th nowrap>' + displayField + '</th>' if showCounts: fieldHeadersHTML += '<th>' + str.join('</th><th>', CORE_FIELDS) + '</th>' displayFieldsFormatSuffixed = list() for field in displayFields: displayFieldsFormatSuffixed.append('%sFormat' % field) if showCounts: for field in CORE_FIELDS: displayFieldsFormatSuffixed.append('%sFormat' % field) return (fieldHeadersHTML, displayFieldsFormatSuffixed) def prepareResultRow(self, dataModel, displayFields): dataModel["controls"] = CONTROLS_TEMPLATE % dataModel dataModel["nPreCols"] = len(displayFields) + 1 # Track spacer columns leading up to order description. +1 for control column dataModel["name"] = dataModel["name"].replace(",", "-") if "nB" in dataModel: if "nAB" not in dataModel: # Baseline query without query items, use matching numbers to ensure calculations will have something to process dataModel["nAB"] = dataModel["nB"] dataModel["nA"] = dataModel["N"] nAB = dataModel["nAB"] nA = dataModel["nA"] nB = dataModel["nB"] N = dataModel["N"] contStats = ContingencyStats(nAB, nA, nB, N) contStats.normalize(truncateNegativeValues=False) for field in displayFields: if field not in dataModel: # Unavailable field, see if it is a derived field that can be calculated dataModel[field] = contStats[field] if field in CORE_FIELDS: pass elif field in PERCENT_FIELDS: # Format as a percentage dataModel["%sFormat" % field] = "%d%%" % (dataModel[field] * 100) elif abs(dataModel[field]) < 0.01: # Allow formatting for very small values dataModel["%sFormat" % field] = "%.0e" % dataModel[field] elif abs(dataModel[field]) < 1: # Smaller value, show more significant digits dataModel["%sFormat" % field] = "%.2f" % dataModel[field] else: # Default just format as limited floating point values dataModel["%sFormat" % field] = "%.1f" % dataModel[field] for field in CORE_FIELDS: # Count fields express as integers, assuming available at all if field in BASELINE_FIELDS: dataModel["%sFormat" % field] = "%d" % dataModel[field] else: # May have small virtual counts from derived scenarios if dataModel[field] > 10: dataModel["%sFormat" % field] = "%.1f" % dataModel[field] else: dataModel["%sFormat" % field] = "%.2f" % dataModel[field] def formatRowHTML(self, dataModel, colNames, showCategory=True): """Specific formatting for row data elements """ htmlList = list() htmlList.append('<tr valign=top>') for col in colNames: if col == "category_description": # Blank out repeat categories if showCategory: htmlList.append( '<td align=center><b>%(category_description)s</b></td>' % dataModel) else: htmlList.append('<td></td>') elif col == "description": htmlList.append('<td align=left>') htmlList.append(DESCRIPTION_TEMPLATE % dataModel) # Only include related link if recommender is enabled if self.requestData['enableRecommender'] == "True": htmlList.append(RELATED_LINK_TEMPLATE % dataModel) htmlList.append('</td>') else: htmlList.append('<td align=right>%s</td>' % dataModel[col]) htmlList.append('</tr>') return str.join("\n", htmlList)
def action_default(self): """Look for related orders by association / recommender methods""" # If patient is specified then modify query and exclusion list based on items already ordered for patient recentItemIds = set() if self.requestData["sim_patient_id"]: patientId = int(self.requestData["sim_patient_id"]) simTime = int(self.requestData["sim_time"]) # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of) manager = SimManager() recentItemIds = manager.recentItemIds(patientId, simTime) # Recommender Instance to test on self.recommender = ItemAssociationRecommender() self.recommender.dataManager.dataCache = webDataCache # Allow caching of data for rapid successive queries query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") # Exclude items already ordered for the patient from any recommended list query.excludeItemIds.update(recentItemIds) if not query.queryItemIds: # If no specific query items specified, then use the recent patient item IDs query.queryItemIds.update(recentItemIds) recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines)
def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery(); analysisQuery.patientIds = set([-11111]); analysisQuery.numQueryItems = 1; analysisQuery.numVerifyItems = 3; analysisQuery.numRecommendations = 4; analysisQuery.recommender = BaselineFrequencyRecommender(); #analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery(); analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data # Don't use items whose default is to be excluded from recommendations analysisQuery.baseRecQuery.excludeCategoryIds = analysisQuery.recommender.defaultExcludedClinicalItemCategoryIds(); analysisQuery.baseRecQuery.excludeItemIds = analysisQuery.recommender.defaultExcludedClinicalItemIds(); #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) ); # Time delta to use for queries, otherwise just default to all times colNames = ["patient_id", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "normalRecall","normalPrecision", "ROC-AUC"]; # Start with default recommender expectedResults = [ RowItemModel([-11111, 1,2,3, 0.333, 0.25, 0.286, 0.208, 0.254, 0.333/1.0, 0.25/0.75, 0.524], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line interface sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","BaselineFrequencyRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","BaselineFrequencyRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Now try targeted recommender analysisQuery.recommender = ItemAssociationRecommender(); expectedResults = [ RowItemModel([-11111, 1,2,3, 0.333, 0.25, 0.286, 0.347, 0.293, 0.333, 0.25/0.75, 0.6666], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Now try multiple query items targeted recommender analysisQuery.numQueryItems = 2; expectedResults = [ RowItemModel([-11111, 1, 2, 3, 0.333, 0.25, 0.286, 0.254, 0.194, 0.333, 0.25/0.75, 0.4167], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","2","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","2","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # More query items with aggregation options analysisQuery.numQueryItems = 3; expectedResults = [ RowItemModel([-11111, 1, 1, 3, 0.5, 0.25, 0.333, 0.517, 0.194, 0.5, 0.25/0.5, 0.4166], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Value filters analysisQuery.baseRecQuery.sortField= "freqRatio"; analysisQuery.baseRecQuery.fieldFilters["freqRatio>"] = 70; expectedResults = [ RowItemModel([-11111, 2, 0, 2, 1.0, 0.5, 0.6666, 1.0, 0.446, 1.0, 0.5/0.5, 0.375], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); del analysisQuery.baseRecQuery.fieldFilters["freqRatio>"]; # Undo to not affect subsequent queries # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-f","freqRatio>:70.0","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-s","freqRatio","-f","freqRatio>:70.0",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Unweighted aggregation analysisQuery.baseRecQuery.weightingMethod = "unweighted"; expectedResults = [ RowItemModel([-11111, 1, 1, 3, 0.5, 0.25, 0.3333, 0.517, 0.194, 0.5, 0.25/0.5, 0.25], colNames ) ]; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Run by equivalent query time span selection rather than explicit counts colNames = ["patient_id", "baseItemId", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"]; expectedResults = [ RowItemModel([-11111, -4, 1, 1, 3, 0.5, 0.25, 0.333, 0.517, 0.194, 0.4167], colNames ) ]; analysisQuery.baseRecQuery.sortField= "conditionalFreq"; analysisQuery.numQueryItems = None; analysisQuery.numVerifyItems = None; analysisQuery.baseCategoryId = -1; analysisQuery.queryTimeSpan = timedelta(0,3*60*60); analysisQuery.verifyTimeSpan = timedelta(50,0); analysisQuery.numRecommendations = 4; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-c","-1","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-c","-1","-Q","5400","-V","4320000",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Run by query time span by identifying base clinical item, rather than a general category analysisQuery.numQueryItems = None; analysisQuery.numVerifyItems = None; analysisQuery.baseCategoryId = None; # Clear prior setting analysisQuery.baseItemId = -4; analysisQuery.queryTimeSpan = timedelta(0,3*60*60); analysisQuery.verifyTimeSpan = timedelta(50,0); analysisQuery.numRecommendations = 4; analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-b","-4","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-b","-4","-Q","5400","-V","4320000",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Basic then Filter test data date range colNames = ["patient_id", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"]; expectedResults = [ RowItemModel([-11111, 1, 1, 3, 0.5, 0.25, 0.33333, 0.4375, 0.29319, 0.66667], colNames ) ]; analysisQuery = AnalysisQuery(); analysisQuery.patientIds = set([-11111]); analysisQuery.numQueryItems = 1; analysisQuery.numVerifyItems = 2; analysisQuery.numRecommendations = 4; analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery(); analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","2",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Date Filters colNames = ["patient_id", "TP", "FN", "FP", "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"]; expectedResults = [ RowItemModel([-11111, 0, 1, 2, 0.0, 0.0, 0.0, 0.0, 0.0, None], colNames ) ]; analysisQuery = AnalysisQuery(); analysisQuery.patientIds = set([-11111]); analysisQuery.numQueryItems = 1; analysisQuery.numVerifyItems = 2; analysisQuery.numRecommendations = 4; analysisQuery.recommender = ItemAssociationRecommender(); analysisQuery.baseRecQuery = RecommenderQuery(); analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data analysisQuery.startDate = datetime(2000,1,1,1); analysisQuery.endDate = datetime(2000,1,10); analysisResults = self.analyzer(analysisQuery); self.assertEqualStatResults(expectedResults, analysisResults, colNames); # Redo with command-line sys.stdout = StringIO(); # Redirect stdout output to collect test results argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-S","2000-01-01 01:00:00","-E","2000-01-10","-R","ItemAssociationRecommender",'0,-11111',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames); # Redo through prepared file intermediary sys.stdout = StringIO(); argv = ["PreparePatientItems.py","-q","1","-v","2","-S","2000-01-01 01:00:00","-E","2000-01-10",'0,-11111',"-"]; self.preparer.main(argv); preparedDataFile = StringIO(sys.stdout.getvalue()); sys.stdin = preparedDataFile; # Read prepared data file from redirected stdin sys.stdout = StringIO(); argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"]; self.analyzer.main(argv); textOutput = StringIO(sys.stdout.getvalue()); self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);
def test_recommenderAnalysis(self): # Run the recommender against the mock test data above and verify expected stats afterwards. analysisQuery = AnalysisQuery() analysisQuery.patientIds = set([-11111]) analysisQuery.baseCategoryId = -7 analysisQuery.queryTimeSpan = timedelta(0, 86400) #analysisQuery.recommender = BaselineFrequencyRecommender(); analysisQuery.recommender = ItemAssociationRecommender() analysisQuery.baseRecQuery = RecommenderQuery() analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30]) analysisQuery.baseRecQuery.maxRecommendedId = 0 # Restrict to test data # Initial run without time limits on outcome measure colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Now try with time limitation on outcome measure analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800) # 1 week colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t", "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", "-t", "604800", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Again, but with much stricter time limit (negative test case) analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800) # 2 day colNames = [ "patient_id", "outcome.-33", "score.-33", "outcome.-32", "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30" ] expectedResults = [ RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109], colNames) ] analysisResults = self.analyzer(analysisQuery) self.assertEqualStatResults(expectedResults, analysisResults, colNames) # Redo but run through command-line interface sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t", "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender", '0,-11111', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames) # Redo through prepared file intermediary sys.stdout = StringIO() argv = [ "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400", "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-" ] self.preparer.main(argv) preparedDataFile = StringIO(sys.stdout.getvalue()) sys.stdin = preparedDataFile # Read prepared data file from redirected stdin sys.stdout = StringIO() argv = [ "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R", "ItemAssociationRecommender", "-t", "172800", '-', "-" ] self.analyzer.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames)
def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -2, -3, 0, 0, 0, 0, 0, 0.0, 0.0], headers ), # Zero count associations, probably shouldn't even be here. If so, ignore them anyway RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 23, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6, -2, 7, 7, 7, 7, 7, 1.0, 1.0], headers ), RowItemModel( [ -6, -4, 20, 20, 20, 20, 20, 1.0, 1.0], headers ), ] for dataModel in dataModels: # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval) for header in headers: if header.startswith("patient_count_"): timeStr = header[len("patient_count_"):] dataModel["count_%s" % timeStr] = dataModel[header] # Copy over value if timeStr != "0" and dataModel[header] > 5: dataModel["count_%s" % timeStr] += 5 (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") self.recommender = ItemAssociationRecommender()
class TestItemRecommender(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) from stride.clinical_item.ClinicalItemDataLoader import ClinicalItemDataLoader ClinicalItemDataLoader.build_clinical_item_psql_schemata() log.info("Populate the database with test data") self.clinicalItemCategoryIdStrList = list() headers = ["clinical_item_category_id", "source_table"] dataModels = \ [ RowItemModel( [-1, "Labs"], headers ), RowItemModel( [-2, "Imaging"], headers ), RowItemModel( [-3, "Meds"], headers ), RowItemModel( [-4, "Nursing"], headers ), RowItemModel( [-5, "Problems"], headers ), RowItemModel( [-6, "Lab Results"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_category", dataModel) self.clinicalItemCategoryIdStrList.append(str(dataItemId)) headers = ["clinical_item_id", "clinical_item_category_id", "name"] dataModels = \ [ RowItemModel( [-1, -1, "CBC"], headers ), RowItemModel( [-2, -1, "BMP"], headers ), RowItemModel( [-3, -1, "Hepatic Panel"], headers ), RowItemModel( [-4, -1, "Cardiac Enzymes"], headers ), RowItemModel( [-5, -2, "CXR"], headers ), RowItemModel( [-6, -2, "RUQ Ultrasound"], headers ), RowItemModel( [-7, -2, "CT Abdomen/Pelvis"], headers ), RowItemModel( [-8, -2, "CT PE Thorax"], headers ), RowItemModel( [-9, -3, "Acetaminophen"], headers ), RowItemModel( [-10, -3, "Carvedilol"], headers ), RowItemModel( [-11, -3, "Enoxaparin"], headers ), RowItemModel( [-12, -3, "Warfarin"], headers ), RowItemModel( [-13, -3, "Ceftriaxone"], headers ), RowItemModel( [-14, -4, "Foley Catheter"], headers ), RowItemModel( [-15, -4, "Strict I&O"], headers ), RowItemModel( [-16, -4, "Fall Precautions"], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", dataModel) headers = [ "patient_item_id", "patient_id", "clinical_item_id", "item_date", "analyze_date" ] dataModels = \ [ RowItemModel( [-1, -11111, -4, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-2, -11111, -10, datetime(2000, 1, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-3, -11111, -8, datetime(2000, 1, 1, 2), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-4, -11111, -10, datetime(2000, 1, 2, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-5, -11111, -12, datetime(2000, 2, 1, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-10, -22222, -7, datetime(2000, 1, 5, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-12, -22222, -6, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-13, -22222, -11, datetime(2000, 1, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-14, -33333, -6, datetime(2000, 2, 9, 0), datetime(2010, 1, 1, 0)], headers ), RowItemModel( [-15, -33333, -2, datetime(2000, 2,11, 0), datetime(2010, 1, 1, 0)], headers ), ] for dataModel in dataModels: (dataItemId, isNew) = DBUtil.findOrInsertItem("patient_item", dataModel) headers = \ [ "clinical_item_id","subsequent_item_id", "patient_count_0","patient_count_3600","patient_count_86400","patient_count_604800","patient_count_any", "time_diff_sum", "time_diff_sum_squares", ] dataModels = \ [ RowItemModel( [ -1, -1, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -2, -2, 30, 30, 30, 30, 30, 0.0, 0.0], headers ), RowItemModel( [ -3, -3, 95, 95, 97, 97, 97, 0.0, 0.0], headers ), RowItemModel( [ -4, -4, 40, 40, 40, 40, 40, 0.0, 0.0], headers ), RowItemModel( [ -5, -5, 40, 40, 50, 50, 50, 0.0, 0.0], headers ), RowItemModel( [ -6, -6, 70, 70, 70, 70, 70, 0.0, 0.0], headers ), RowItemModel( [ -2, -3, 0, 0, 0, 0, 0, 0.0, 0.0], headers ), # Zero count associations, probably shouldn't even be here. If so, ignore them anyway RowItemModel( [ -2, -4, 0, 2, 3, 3, 3, 200.0, 50000.0], headers ), RowItemModel( [ -2, -6, 2, 2, 5, 5, 5, 300.0, 11990.0], headers ), RowItemModel( [ -3, -1, 20, 23, 23, 23, 23, 400.0, 344990.0], headers ), RowItemModel( [ -4, -5, 3, 3, 13, 43, 43, 340.0, 343110.0], headers ), RowItemModel( [ -4, -6, 23, 33, 33, 33, 63, 420.0, 245220.0], headers ), RowItemModel( [ -4, -7, 23, 33, 33, 33, 63, 40.0, 5420.0], headers ), RowItemModel( [ -5, -4, 0, 0, 20, 20, 20, 540.0, 54250.0], headers ), RowItemModel( [ -6, -2, 7, 7, 7, 7, 7, 1.0, 1.0], headers ), RowItemModel( [ -6, -4, 20, 20, 20, 20, 20, 1.0, 1.0], headers ), ] for dataModel in dataModels: # Add non patient_count variations (Adding 5 to values that are >5 and not for the zero time interval) for header in headers: if header.startswith("patient_count_"): timeStr = header[len("patient_count_"):] dataModel["count_%s" % timeStr] = dataModel[header] # Copy over value if timeStr != "0" and dataModel[header] > 5: dataModel["count_%s" % timeStr] += 5 (dataItemId, isNew) = DBUtil.findOrInsertItem("clinical_item_association", dataModel) # Indicate that cache data needs to be updated self.dataManager = DataManager() self.dataManager.clearCacheData("analyzedPatientCount") self.dataManager.clearCacheData("clinicalItemCountsUpdated") self.recommender = ItemAssociationRecommender() # Instance to test on def tearDown(self): """Restore state from any setUp or test steps""" log.info("Purge test records from the database") DBUtil.execute( "delete from clinical_item_association where clinical_item_id < 0") DBUtil.execute("delete from patient_item where patient_item_id < 0") DBUtil.execute("delete from clinical_item where clinical_item_id < 0") DBUtil.execute( "delete from clinical_item_category where clinical_item_category_id in (%s)" % str.join(",", self.clinicalItemCategoryIdStrList)) DBTestCase.tearDown(self) def test_recommender(self): # Run the recommender against the mock test data above and verify expected stats afterwards. query = RecommenderQuery() #query.queryItemIds = set(); #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data log.debug( "Query with no item key input, just return ranks by general likelihood then." ) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with key item inputs for which no data exists. Effecitvely ignore it then, so just return ranks by general likelihood." ) query.queryItemIds = set([-100]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-3], headers ), RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("Query with category filter on recommended results.") query.queryItemIds = set([-100]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "Query with category filter and specific exclusion filter on recommended results." ) query.queryItemIds = set([-100]) query.excludeItemIds = set([-6]) query.excludeCategoryIds = set([-1, -4, -5, -6]) headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-5], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query with a couple of input clinical items + one with no association data (should effectively be ignored)." ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug( "General query but set a limit on time delta worth counting item associations" ) query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with category limit") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set() query.excludeCategoryIds = set([-2, -4, -5, -6]) query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-4], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) log.debug("General query with specific exclusion") query.queryItemIds = set([-2, -5, -100]) query.excludeItemIds = set([-4, -3, -2]) query.excludeCategoryIds = set() query.timeDeltaMax = DELTA_HOUR headers = ["clinical_item_id"] expectedData = \ [ RowItemModel( [-6], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) def test_recommender_aggregation(self): # Test different scoring aggregation methods query = RecommenderQuery() query.countPrefix = "patient_" query.queryItemIds = set([-2, -5]) #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data headers = ["clinical_item_id", "conditionalFreq", "freqRatio"] # Default weighted aggregation method expectedData = \ [ RowItemModel( [-4, 0.3, 22.5], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Change to unweighted aggregation method query.aggregationMethod = "unweighted" expectedData = \ [ RowItemModel( [-4, 0.32857, 24.64286], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Change to Serial Bayes aggregation method query.aggregationMethod = "SerialBayes" expectedData = \ [ RowItemModel( [-4, 0.89157, 66.867471], headers ), RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Naive Bayes aggregation query.aggregationMethod = "NaiveBayes" expectedData = \ [ RowItemModel( [-4, 3.75, 281.25], headers ), # Without truncating negative values #RowItemModel( [-4, 0.8, 58.59707], headers ), # With truncating negative values RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) # Apply value filter query.fieldFilters["freqRatio>"] = 10.0 expectedData = \ [ RowItemModel( [-6, 0.16667, 7.142857], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedData(expectedData, recommendedData, query) def assertEqualRecommendedData(self, expectedData, recommendedData, query): """Run assertEqualGeneral on the key components of the contents of the recommendation data. Don't necessarily care about the specific numbers that come out of the recommendations, but do care about consistency in rankings and relative order by the query.sortField """ lastScore = None for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure derived statistics are populated to enable comparisons ItemAssociationRecommender.populateDerivedStats( recommendedItem, expectedItem.keys()) self.assertEqualDict(expectedItem, recommendedItem, ["clinical_item_id"]) for key in expectedItem.iterkeys( ): # If specified, then verify a specific values if isinstance(expectedItem[key], float): self.assertAlmostEquals(expectedItem[key], recommendedItem[key], 5) else: self.assertEqual(expectedItem[key], recommendedItem[key]) if lastScore is not None: self.assertTrue(recommendedItem[query.sortField] <= lastScore) # Verify descending order of scores lastScore = recommendedItem[query.sortField] self.assertEqual(len(expectedData), len(recommendedData)) def test_recommender_stats(self): # Run the recommender against the mock test data above and verify expected stats calculations query = RecommenderQuery() query.parseParams \ ( { "countPrefix": "patient_", "queryItemIds": "-6", "resultCount": "3", # Just get top 3 ranks for simplicity "maxRecommendedId": "0", # Artificial constraint to focus only on test data "sortField": "P-Fisher", # Specifically request derived expected vs. observed stats } ) log.debug("Query with single item not perturbed by others.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "P-Fisher" ] expectedData = \ [ RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 7.0, 0.1, 0.0100, 10.0, 3.7e-06], headers ), RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0, 0.286, 0.0133, 21.42857, 1.2e-23], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) log.debug("Query for non-unique counts.") query.parseParams \ ( { "countPrefix": "", "sortField": "oddsRatio", } ) headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "oddsRatio" ] expectedData = \ [ RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0, 0.35714, 0.01333, 26.7857, 107.96296], headers ), RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0, 0.1714, 0.01, 17.1429, 33.47126], headers ), ] recommendedData = self.recommender(query) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) def assertEqualRecommendedDataStats(self, expectedData, recommendedData, headers): """Run assertEqualGeneral on the key components of the contents of the recommendation data. In this case, we do want to verify actual score / stat values match """ self.assertEqual(len(expectedData), len(recommendedData)) for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure the recommendedData has all fields of interest populated / calculated ItemAssociationRecommender.populateDerivedStats( recommendedItem, headers) for header in headers: expectedValue = expectedItem[header] recommendedValue = recommendedItem[header] msg = 'Dicts diff with key (%s). Verify = %s, Sample = %s' % ( header, expectedValue, recommendedValue) self.assertAlmostEquals(expectedValue, recommendedValue, 3, msg) def test_recommender_stats_commandline(self): # Run the recommender against the mock test data above and verify expected stats calculations log.debug("Query with single item not perturbed by others.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "P-Fisher" ] expectedData = \ [ RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 7.0, 0.1, 0.0100, 10.0, 3.7e-06], headers ), RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0, 0.286, 0.0133, 21.42857, 1.2e-23], headers ), ] sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "ItemRecommender.py", "maxRecommendedId=0&queryItemIds=-6&countPrefix=patient_&resultCount=3&sortField=P-Fisher", "-" ] self.recommender.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualRecommendedDataStatsTextOutput(expectedData, textOutput, headers) log.debug("Query for non-unique counts.") headers = [ "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq", "baselineFreq", "freqRatio", "oddsRatio" ] expectedData = \ [ RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0, 0.35714, 0.01333, 26.7857, 107.96296], headers ), RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0, 0.1714, 0.01, 17.1429, 33.47126], headers ), ] sys.stdout = StringIO() # Redirect stdout output to collect test results argv = [ "ItemRecommender.py", "maxRecommendedId=0&queryItemIds=-6&countPrefix=&resultCount=3&sortField=oddsRatio", "-" ] self.recommender.main(argv) textOutput = StringIO(sys.stdout.getvalue()) self.assertEqualRecommendedDataStatsTextOutput(expectedData, textOutput, headers) def assertEqualRecommendedDataStatsTextOutput(self, expectedData, textOutput, headers): """Run assertEqualGeneral on the key components of the contents of the recommendation data. In this case, we do want to verify actual score / stat values match """ recommendedData = list() for dataRow in TabDictReader(textOutput): for key, value in dataRow.iteritems(): if key in headers: dataRow[key] = float(value) # Parse into numerical values for comparison recommendedData.append(dataRow) self.assertEqualRecommendedDataStats(expectedData, recommendedData, headers) def test_dataCache(self): # Test that repeating queries with cache turned on will not result in extra DB queries query = RecommenderQuery() query.countPrefix = "patient_" query.queryItemIds = set([-2, -5]) #query.excludeItemIds = set(); #query.categoryIds = set(); #query.timeDeltaMax = None; # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent. If left blank, will just consider all items within a given patient as co-occurrent. query.limit = 3 # Just get top 3 ranks for simplicity query.maxRecommendedId = 0 # Artificial constraint to focus only on test data headers = ["clinical_item_id", "conditionalFreq", "freqRatio"] # First query without cache self.recommender.dataManager.dataCache = None baselineData = self.recommender(query) baselineQueryCount = self.recommender.dataManager.queryCount # Redo query with cache self.recommender.dataManager.dataCache = dict() newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) # Ensure getting same results self.assertNotEqual(baselineQueryCount, newQueryCount) # Expect needed more queries since no prior cache baselineQueryCount = newQueryCount # Again, but should be no new query since have cached results last time newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # Repeat multiple times, should still have no new query activity # prog = ProgressDots(10,1,"repeats"); for iRepeat in xrange(10): newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # prog.update(); # prog.printStatus(); # Query for subset should still yield no new query query.queryItemIds = set([-2]) newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqual(baselineQueryCount, newQueryCount) # Expect no queries for subsets # Repeat query for subset newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount) # Expect no queries for subsets # Query for partial subset, partial new query.queryItemIds = set([-5, -6]) newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqual(baselineQueryCount, newQueryCount) # Expect now new queries for subsets, because first query should have done mass-all query # Repeat for partial subset, no longer new newData = self.recommender(query) newQueryCount = self.recommender.dataManager.queryCount baselineData = newData # New baseline for subset self.assertEqualRecommendedData(baselineData, newData, query) self.assertEqual(baselineQueryCount, newQueryCount)
class ItemRecommendationTable(BaseDynamicData): """Simple script to (dynamically) relay query and result data from the ItemRecommendation module in URL request then HTML table format. """ def __init__(self): BaseDynamicData.__init__(self) self.requestData["queryItemIds"] = "" self.requestData["targetItemIds"] = "" self.requestData["excludeItemIds"] = "" self.requestData["excludeCategoryIds"] = "" self.requestData["timeDeltaMax"] = "" self.requestData["sortField"] = "PPV" self.requestData["sortReverse"] = "True" self.requestData["resultCount"] = "10" self.requestData["invertQuery"] = "" self.requestData["showCounts"] = "" self.requestData["countPrefix"] = "" self.requestData["aggregationMethod"] = "weighted" self.requestData["fieldHeaders"] = "" self.requestData["dataRows"] = "" self.addHandler("resultCount", ItemRecommendationTable.action_default.__name__) self.recommender = ItemAssociationRecommender() # Instance to test on self.recommender.dataManager.dataCache = webDataCache def action_default(self): query = RecommenderQuery() query.parseParams(self.requestData) displayFields = query.getDisplayFields() recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) colNames = [ "controls", "rank", "name", "description", "category_description" ] colNames.extend(displayFieldsFormatSuffixed) formatter = HtmlResultsFormatter(StringIO(), valign="middle", align="center") formatter.formatResultDicts(recommendedData, colNames) self.requestData["dataRows"] = formatter.getOutFile().getvalue() def prepareDisplayHeaders(self, displayFields): showCounts = (self.requestData["showCounts"].lower() not in FALSE_STRINGS) fieldHeadersHTML = '<th nowrap>' + str.join('</th><th nowrap>', displayFields) + '</th>' if showCounts: fieldHeadersHTML += '<th>' + str.join('</th><th>', CORE_FIELDS) + '</th>' displayFieldsFormatSuffixed = list() for field in displayFields: displayFieldsFormatSuffixed.append('%sFormat' % field) if showCounts: for field in CORE_FIELDS: displayFieldsFormatSuffixed.append('%sFormat' % field) return (fieldHeadersHTML, displayFieldsFormatSuffixed) def prepareResultRow(self, dataModel, displayFields): dataModel["controls"] = CONTROLS_TEMPLATE % dataModel dataModel["name"] = dataModel["name"].replace(",", "-") if "nAB" not in dataModel: # Baseline query without query items, use matching numbers to ensure calculations will have something to process dataModel["nAB"] = dataModel["nB"] dataModel["nA"] = dataModel["N"] nAB = dataModel["nAB"] nA = dataModel["nA"] nB = dataModel["nB"] N = dataModel["N"] contStats = ContingencyStats(nAB, nA, nB, N) contStats.normalize(truncateNegativeValues=False) for field in displayFields: if field not in dataModel: # Unavailable field, see if it is a derived field that can be calculated dataModel[field] = contStats[field] if field in CORE_FIELDS: pass elif abs(dataModel[field]) < 0.01: # Allow formatting for very small values dataModel["%sFormat" % field] = "%.1e" % dataModel[field] elif dataModel[field] == sys.float_info.max: dataModel["%sFormat" % field] = "MaxOverflow" # Symbolic representation of very large value else: # Default just format as floating point values dataModel["%sFormat" % field] = "%.2f" % dataModel[field] for field in CORE_FIELDS: # Count fields express as integers, assuming available at all if field in BASELINE_FIELDS: dataModel["%sFormat" % field] = "%d" % dataModel[field] else: # May have small virtual counts from derived scenarios if dataModel[field] > 10: dataModel["%sFormat" % field] = "%.1f" % dataModel[field] else: dataModel["%sFormat" % field] = "%.2f" % dataModel[field]
def action_default(self): """Look for related orders by association / recommender methods""" self.recommender = ItemAssociationRecommender() # Instance to test on self.recommender.dataManager.dataCache = webDataCache query = RecommenderQuery() if self.requestData["sortField"] == "": self.requestData["sortField"] = "P-YatesChi2-NegLog" # P-Fisher-NegLog should yield better results, but beware, much longer to calculate query.parseParams(self.requestData) if len(query.excludeItemIds) == 0: query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds( ) if len(query.excludeCategoryIds) == 0: query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds( ) #query.fieldList.extend( ["prevalence","PPV","RR"] ); displayFields = list() if self.requestData["displayFields"] != "": displayFields = self.requestData["displayFields"].split(",") recommendedData = self.recommender(query) if len(recommendedData) > 0: # Denormalize results with links to clinical item descriptions self.recommender.formatRecommenderResults(recommendedData) # Display fields should append Format suffix to identify which version to display, but use original for header labels (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed ) = self.prepareDisplayHeaders(displayFields) # Format for HTML and add a control field for interaction with the data for dataModel in recommendedData: self.prepareResultRow(dataModel, displayFields) # Try organize by category if self.requestData["groupByCategory"]: recommendedData = self.recommender.organizeByCategory( recommendedData) colNames = ["controls"] # "name" for code. ,"category_description" colNames.extend(displayFieldsFormatSuffixed) colNames.extend(["description"]) lastModel = None htmlLines = list() for dataModel in recommendedData: newCategory = (lastModel is None or lastModel["category_description"] != dataModel["category_description"]) showCategory = (self.requestData["groupByCategory"] and newCategory) # Limit category display if many repeats if showCategory: htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel) htmlLines.append( self.formatRowHTML(dataModel, colNames, showCategory)) lastModel = dataModel self.requestData["dataRows"] = str.join("\n", htmlLines)
order by ic.section, ic.name, ci.name; """ resultsTable = DBUtil.execute(existingReferenceOrderQuery) admitDxIdSectionGuidelineNameTuples = set() # Keep track of each guideline name set itemIdsByAdmitDxId = dict() for admitDxId, sectionName, guidelineName, itemId, itemName, itemDescription, itemCount in resultsTable: if admitDxId not in itemIdsByAdmitDxId: itemIdsByAdmitDxId[admitDxId] = set() itemIdsByAdmitDxId[admitDxId].add(itemId) admitDxIdSectionGuidelineNameTuples.add( (admitDxId, sectionName, guidelineName)) recommender = ItemAssociationRecommender() for admitDxId, itemIds in itemIdsByAdmitDxId.iteritems(): print >> sys.stderr, admitDxId, len(itemIds) recQuery = RecommenderQuery() recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds() recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds( ) recQuery.queryItemIds = [admitDxId] recQuery.timeDeltaMax = timedelta(1) # Within one day recQuery.countPrefix = "patient_" recQuery.limit = TOP_ITEM_COUNT # Top results by P-value recQuery.sortField = "P-YatesChi2-NegLog"