Пример #1
0
    def action_default(self):

        query = RecommenderQuery()
        query.parseParams(self.requestData)
        displayFields = query.getDisplayFields()

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        colNames = [
            "controls", "rank", "name", "description", "category_description"
        ]
        colNames.extend(displayFieldsFormatSuffixed)
        formatter = HtmlResultsFormatter(StringIO(),
                                         valign="middle",
                                         align="center")
        formatter.formatResultDicts(recommendedData, colNames)

        self.requestData["dataRows"] = formatter.getOutFile().getvalue()
Пример #2
0
    def test_recommender_aggregation(self):
        # Test different scoring aggregation methods

        query = RecommenderQuery()
        query.countPrefix = "patient_"
        query.queryItemIds = set([-2, -5])
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        headers = ["clinical_item_id", "conditionalFreq", "freqRatio"]

        # Default weighted aggregation method
        expectedData = \
            [   RowItemModel( [-4, 0.3,    22.5], headers ),
                RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Change to unweighted aggregation method
        query.aggregationMethod = "unweighted"
        expectedData = \
            [   RowItemModel( [-4, 0.32857, 24.64286], headers ),
                RowItemModel( [-6, 0.16667,  7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Change to Serial Bayes aggregation method
        query.aggregationMethod = "SerialBayes"
        expectedData = \
            [   RowItemModel( [-4, 0.89157, 66.867471], headers ),
                RowItemModel( [-6, 0.16667,  7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Naive Bayes aggregation
        query.aggregationMethod = "NaiveBayes"
        expectedData = \
            [   RowItemModel( [-4, 3.75,   281.25], headers ),      # Without truncating negative values
                #RowItemModel( [-4, 0.8,    58.59707], headers ),   # With truncating negative values
                RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)

        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        # Apply value filter
        query.fieldFilters["freqRatio>"] = 10.0
        expectedData = \
            [   RowItemModel( [-6, 0.16667, 7.142857], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)
Пример #3
0
    def test_tripleSequence_virtualItem(self):
        # Test outcome assessment when the target is a virtual item based on the presence of a triple (instead of double) sequence of items
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-22222])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        analysisQuery.sequenceItemIdsByVirtualItemId[-16] = (-15, -14)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-16])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = ["patient_id", "outcome.-16", "score.-16"]
        expectedResults = [RowItemModel([-22222, +1, 0.14286], colNames)]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-16=-15:-14", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-22222', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
Пример #4
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <inputFile> [<outputFile>]\n"+\
                    "   <inputFile>    Validation file in prepared result file format use generated LDA models to predict items and compare against verify sets similar to RecommendationClassficationAnalysis. \n"+\
                    "   <outputFile>   Validation result stat summaries.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option("-M", "--modelFile",  dest="modelFile", help="Name of the file to load an LDA or HDP model and topic word document counts from.");
        parser.add_option("-X", "--excludeCategoryIds",  dest="excludeCategoryIds", help="For recommendation, exclude / skip any items who fall under one of these comma-separated category Ids.");
        parser.add_option("-i", "--itemsPerCluster",  dest="itemsPerCluster", default=DEFAULT_TOPIC_ITEM_COUNT, help="Specify number of top topic items to consider when scoring recommendations.");
        parser.add_option("-m", "--minClusterWeight",  dest="minClusterWeight", default=DEFAULT_MIN_TOPIC_WEIGHT, help="When scoring recommendations, skip any topics with less than this relation weight (effectively scores as zero, but can avoid a lot of low yield calculations).");
        parser.add_option("-s", "--sortField",  dest="sortField", default=DEFAULT_SORT_FIELD, help="Score field to sort top recommendations by.  Default to posterior probabilty 'totelItemWeight', but can also select 'lift' = 'tfidf' = 'interest' for TF*IDF style score weighting.");
        parser.add_option("-r", "--numRecs",   dest="numRecs",  default=DEFAULT_RECOMMENDED_ITEM_COUNT, help="Number of orders / items to recommend for comparison against the verification set. Alternative set option numRecsByOrderSet to look for key order set usage and size.");
        parser.add_option("-O", "--numRecsByOrderSet",   dest="numRecsByOrderSet", action="store_true", help="If set, then look for an order_set_id column to find the key order set that triggered the evaluation time point to determine number of recommendations to consider.");
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: "+str.join(" ", argv))
        timer = time.time();
        if len(args) >= 1:
            query = AnalysisQuery();
            query.preparedPatientItemFile = stdOpen(args[0]);
            query.recommender = TopicModelRecommender(options.modelFile);
            query.baseRecQuery = RecommenderQuery();
            if options.excludeCategoryIds is not None:
                query.baseRecQuery.excludeCategoryIds = set();
                for categoryIdStr in options.executeCategoryIds.split(","):
                    query.baseRecQuery.excludeCategoryIds.add(int(categoryIdStr));
            else:   # Default exclusions if none specified
                query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds();
                query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds();
            query.baseRecQuery.itemsPerCluster = int(options.itemsPerCluster);
            query.baseRecQuery.minClusterWeight = float(options.minClusterWeight);

            query.baseRecQuery.sortField = options.sortField;
            query.numRecommendations = int(options.numRecs);
            query.numRecsByOrderSet = options.numRecsByOrderSet;

            # Run the actual analysis
            analysisResults = self(query);

            # Format the results for output
            outputFilename = None;
            if len(args) > 1:
                outputFilename = args[1];
            outputFile = stdOpen(outputFilename,"w");

            # Print comment line with analysis arguments to allow for deconstruction later
            summaryData = {"argv": argv};
            print(COMMENT_TAG, json.dumps(summaryData), file=outputFile);

            formatter = TextResultsFormatter( outputFile );
            colNames = self.resultHeaders(query);
            formatter.formatTuple( colNames );  # Insert a mock record to get a header / label row
            formatter.formatResultDicts( analysisResults, colNames );
        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer;
        log.info("%.3f seconds to complete",timer);
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.recommender = BaselineFrequencyRecommender()
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        #recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(conn=conn);
        #recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds(conn=conn);
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = [
            "patient_id", "clinical_item_id", "iItem", "iRecItem", "recRank",
            "recScore"
        ]

        # Start with default recommender
        expectedResults = \
            [
                (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.170),    Don't care about specific scores, as long as ranks are correct
                (-11111,-10, 1, 1, 4, SENTINEL_ANY_FLOAT),    #0.032),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.025),
                (-11111,-12, 4, 3, 2, SENTINEL_ANY_FLOAT),    #0.053),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender()
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
                (-11111, -8, 2, 2, 5, SENTINEL_ANY_FLOAT),    #0.190),
                (-11111,-12, 4, 3, 1, SENTINEL_ANY_FLOAT),    #0.444),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)

        # Repeat, but put a limit on maximum number of query items and recommendations we want analyzed
        analysisQuery.queryItemMax = 2
        expectedResults = \
            [   (-11111, -4, 0, 0, 1, SENTINEL_ANY_FLOAT),    #0.167),
                (-11111,-10, 1, 1, 2, SENTINEL_ANY_FLOAT),    #0.304),
            ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualTable(expectedResults, analysisResults, 3)
Пример #6
0
    def test_recommender_stats(self):
        # Run the recommender against the mock test data above and verify expected stats calculations

        query = RecommenderQuery()
        query.parseParams \
        (   {   "countPrefix": "patient_",
                "queryItemIds": "-6",
                "resultCount": "3",    # Just get top 3 ranks for simplicity
                "maxRecommendedId": "0", # Artificial constraint to focus only on test data
                "sortField": "P-Fisher",   # Specifically request derived expected vs. observed stats
            }
        )

        log.debug("Query with single item not perturbed by others.")
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "P-Fisher"
        ]
        expectedData = \
            [
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0,  7.0,  0.1,    0.0100, 10.0,       3.7e-06], headers ),
                RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 20.0,  0.286,  0.0133, 21.42857,   1.2e-23], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)

        log.debug("Query for non-unique counts.")
        query.parseParams \
        (   {   "countPrefix": "",
                "sortField": "oddsRatio",
            }
        )
        headers = [
            "clinical_item_id", "N", "nB", "nA", "nAB", "conditionalFreq",
            "baselineFreq", "freqRatio", "oddsRatio"
        ]
        expectedData = \
            [   RowItemModel( [-4, SIMULATED_PATIENT_COUNT, 40.0, 70.0, 25.0,  0.35714, 0.01333,  26.7857, 107.96296], headers ),
                RowItemModel( [-2, SIMULATED_PATIENT_COUNT, 30.0, 70.0, 12.0,  0.1714,  0.01,     17.1429,  33.47126], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedDataStats(expectedData, recommendedData,
                                             headers)
Пример #7
0
    def test_parsePreparedResultFile(self):
        # Run the analysis preparer against the mock test data above and verify can parse back text file into original object form.

        # Key columns to verify
        colNames = \
            [   "patient_id",
                "baseItemId",
                "baseItemDate",
                "queryStartTime",
                "queryEndTime",
                "verifyEndTime",
                "queryItemCountById",
                "verifyItemCountById",
                "outcome.-33","outcome.-32", "outcome.-31","outcome.-30"
            ]

        # Initial run without time limits on outcome measure
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111, -44444])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        analysisQuery.verifyTimeSpan = timedelta(0, 604800)
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30])
        analysisQuery.baseRecQuery.excludeItemIds = [-13]
        analysisQuery.baseRecQuery.excludeCategoryIds = [-5]
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data
        directResults = list(self.analyzer(analysisQuery))

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V",
            "604800", "-o", "-33,-32,-31,-30", '0,-11111,-44444', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        textBasedResults = list(
            self.analyzer.parsePreparedResultFile(textOutput))

        self.assertEqualResultDicts(directResults, textBasedResults, colNames)
Пример #8
0
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        # If patient is specified then modify query and exclusion list based on items already ordered for patient
        recentItemIds = set()
        if self.requestData["sim_patient_id"]:
            patientId = int(self.requestData["sim_patient_id"])
            simTime = int(self.requestData["sim_time"])

            # Track recent item IDs (orders, diagnoses, unlocked results, etc. that related order queries will be based off of)
            manager = SimManager()
            recentItemIds = manager.recentItemIds(patientId, simTime)

        # Recommender Instance to test on
        self.recommender = ItemAssociationRecommender()
        self.recommender.dataManager.dataCache = webDataCache
        # Allow caching of data for rapid successive queries

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        # Exclude items already ordered for the patient from any recommended list
        query.excludeItemIds.update(recentItemIds)
        if not query.queryItemIds:  # If no specific query items specified, then use the recent patient item IDs
            query.queryItemIds.update(recentItemIds)

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
Пример #9
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <inputFile> [<outputFile>]\n"+\
                    "   <inputFile>    Validation file in prepared result file format.  Predict items and compare against verify sets similar to RecommendationClassficationAnalysis. \n"+\
                    "   <outputFile>   Validation result stat summaries.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-r",
            "--numRecs",
            dest="numRecs",
            default=DEFAULT_RECOMMENDED_ITEM_COUNT,
            help=
            "Number of orders / items to recommend for comparison against the verification set, sorted in prevalence order.  If skip or set <1, then will use all order set items found."
        )
        parser.add_option(
            "-O",
            "--numRecsByOrderSet",
            dest="numRecsByOrderSet",
            action="store_true",
            help=
            "If set, then look for an order_set_id column to find the key order set that triggered the evaluation time point to determine number of recommendations to consider."
        )
        parser.add_option(
            "-s",
            "--sortField",
            dest="sortField",
            default=DEFAULT_SORT_FIELD,
            help=
            "Allow overriding of default sort field when returning ranked results (patient_count, name, description, etc.)"
        )
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()
        if len(args) >= 1:
            query = AnalysisQuery()
            query.preparedPatientItemFile = stdOpen(args[0])
            query.recommender = OrderSetRecommender()
            query.baseRecQuery = RecommenderQuery()
            # Default exclusions if none specified
            query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds(
            )
            query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds(
            )
            query.baseRecQuery.sortField = options.sortField
            query.numRecommendations = int(options.numRecs)
            query.numRecsByOrderSet = options.numRecsByOrderSet

            # Run the actual analysis
            analysisResults = self(query)

            # Format the results for output
            outputFilename = None
            if len(args) > 1:
                outputFilename = args[1]
            outputFile = stdOpen(outputFilename, "w")

            # Print comment line with analysis arguments to allow for deconstruction later
            summaryData = {
                "argv": argv
            }
            print(COMMENT_TAG, json.dumps(summaryData), file=outputFile)

            formatter = TextResultsFormatter(outputFile)
            colNames = self.resultHeaders(query)
            formatter.formatTuple(colNames)
            # Insert a mock record to get a header / label row
            formatter.formatResultDicts(analysisResults, colNames)
        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 3;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = BaselineFrequencyRecommender();
        #analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data

        # Don't use items whose default is to be excluded from recommendations
        analysisQuery.baseRecQuery.excludeCategoryIds = analysisQuery.recommender.defaultExcludedClinicalItemCategoryIds();
        analysisQuery.baseRecQuery.excludeItemIds = analysisQuery.recommender.defaultExcludedClinicalItemIds();
        #recQuery.timeDeltaMax = timedelta(0, int(self.requestData["timeDeltaMax"]) );  # Time delta to use for queries, otherwise just default to all times

        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "normalRecall","normalPrecision", "ROC-AUC"];
        
        # Start with default recommender
        expectedResults = [ RowItemModel([-11111,  1,2,3,  0.333, 0.25, 0.286,  0.208, 0.254, 0.333/1.0, 0.25/0.75, 0.524], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line interface
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","BaselineFrequencyRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","BaselineFrequencyRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);


        
       
        # Now try targeted recommender
        analysisQuery.recommender = ItemAssociationRecommender();
        expectedResults = [ RowItemModel([-11111,  1,2,3,  0.333, 0.25, 0.286,  0.347, 0.293, 0.333, 0.25/0.75, 0.6666], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Now try multiple query items targeted recommender
        analysisQuery.numQueryItems = 2;
        expectedResults = [ RowItemModel([-11111, 1, 2, 3,  0.333, 0.25, 0.286,  0.254, 0.194, 0.333, 0.25/0.75, 0.4167], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","2","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","2","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # More query items with aggregation options
        analysisQuery.numQueryItems = 3;
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.333,  0.517, 0.194, 0.5, 0.25/0.5, 0.4166], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);






        # Value filters
        analysisQuery.baseRecQuery.sortField= "freqRatio";
        analysisQuery.baseRecQuery.fieldFilters["freqRatio>"] = 70;
        expectedResults = [ RowItemModel([-11111, 2, 0, 2,   1.0, 0.5, 0.6666,  1.0, 0.446, 1.0, 0.5/0.5, 0.375], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);
        del analysisQuery.baseRecQuery.fieldFilters["freqRatio>"];  # Undo to not affect subsequent queries

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-f","freqRatio>:70.0","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-s","freqRatio","-f","freqRatio>:70.0",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);





        # Unweighted aggregation
        analysisQuery.baseRecQuery.weightingMethod = "unweighted";
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.3333,  0.517, 0.194, 0.5, 0.25/0.5, 0.25], colNames ) ];
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-q","3","-v","3","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","3","-v","3",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-s","freqRatio","-P","-r","4","-m","0","-R","ItemAssociationRecommender","-a","unweighted",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Run by equivalent query time span selection rather than explicit counts
        colNames = ["patient_id", "baseItemId", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, -4, 1, 1, 3,   0.5, 0.25, 0.333,  0.517, 0.194, 0.4167], colNames ) ];

        analysisQuery.baseRecQuery.sortField= "conditionalFreq";
        analysisQuery.numQueryItems = None;
        analysisQuery.numVerifyItems = None;
        analysisQuery.baseCategoryId = -1;
        analysisQuery.queryTimeSpan = timedelta(0,3*60*60);
        analysisQuery.verifyTimeSpan = timedelta(50,0);
        analysisQuery.numRecommendations = 4;
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-c","-1","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-c","-1","-Q","5400","-V","4320000",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # Run by query time span by identifying base clinical item, rather than a general category
        analysisQuery.numQueryItems = None;
        analysisQuery.numVerifyItems = None;
        analysisQuery.baseCategoryId = None;    # Clear prior setting
        analysisQuery.baseItemId = -4;
        analysisQuery.queryTimeSpan = timedelta(0,3*60*60);
        analysisQuery.verifyTimeSpan = timedelta(50,0);
        analysisQuery.numRecommendations = 4;
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-b","-4","-Q","5400","-V","4320000","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-b","-4","-Q","5400","-V","4320000",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);




        # Basic then Filter test data date range
        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, 1, 1, 3,   0.5, 0.25, 0.33333,  0.4375, 0.29319, 0.66667], colNames ) ];
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 2;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","2",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);



        # Date Filters
        colNames = ["patient_id", "TP", "FN", "FP",  "recall", "precision", "F1-score", "weightRecall","weightPrecision", "ROC-AUC"];
        expectedResults = [ RowItemModel([-11111, 0, 1, 2,   0.0, 0.0, 0.0,  0.0, 0.0, None], colNames ) ];
        analysisQuery = AnalysisQuery();
        analysisQuery.patientIds = set([-11111]);
        analysisQuery.numQueryItems = 1;
        analysisQuery.numVerifyItems = 2;
        analysisQuery.numRecommendations = 4;
        analysisQuery.recommender = ItemAssociationRecommender();
        analysisQuery.baseRecQuery = RecommenderQuery();
        analysisQuery.baseRecQuery.maxRecommendedId = 0; # Restrict to test data
        analysisQuery.startDate = datetime(2000,1,1,1);
        analysisQuery.endDate = datetime(2000,1,10);
        analysisResults = self.analyzer(analysisQuery);
        self.assertEqualStatResults(expectedResults, analysisResults, colNames);

        # Redo with command-line
        sys.stdout = StringIO();    # Redirect stdout output to collect test results
        argv = ["RecommendationClassificationAnalysis.py","-q","1","-v","2","-r","4","-m","0","-S","2000-01-01 01:00:00","-E","2000-01-10","-R","ItemAssociationRecommender",'0,-11111',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);

        # Redo through prepared file intermediary
        sys.stdout = StringIO();    
        argv = ["PreparePatientItems.py","-q","1","-v","2","-S","2000-01-01 01:00:00","-E","2000-01-10",'0,-11111',"-"];
        self.preparer.main(argv);
        preparedDataFile = StringIO(sys.stdout.getvalue());

        sys.stdin = preparedDataFile;   # Read prepared data file from redirected stdin
        sys.stdout = StringIO();
        argv = ["RecommendationClassificationAnalysis.py","-P","-r","4","-m","0","-R","ItemAssociationRecommender",'-',"-"];
        self.analyzer.main(argv);
        textOutput = StringIO(sys.stdout.getvalue());
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput, colNames);
Пример #11
0
    def action_default(self):
        """Look for related orders by association / recommender methods"""
        self.recommender = ItemAssociationRecommender()
        # Instance to test on
        self.recommender.dataManager.dataCache = webDataCache

        query = RecommenderQuery()
        if self.requestData["sortField"] == "":
            self.requestData["sortField"] = "P-YatesChi2-NegLog"
            # P-Fisher-NegLog should yield better results, but beware, much longer to calculate
        query.parseParams(self.requestData)
        if len(query.excludeItemIds) == 0:
            query.excludeItemIds = self.recommender.defaultExcludedClinicalItemIds(
            )
        if len(query.excludeCategoryIds) == 0:
            query.excludeCategoryIds = self.recommender.defaultExcludedClinicalItemCategoryIds(
            )
        #query.fieldList.extend( ["prevalence","PPV","RR"] );
        displayFields = list()
        if self.requestData["displayFields"] != "":
            displayFields = self.requestData["displayFields"].split(",")

        recommendedData = self.recommender(query)

        if len(recommendedData) > 0:
            # Denormalize results with links to clinical item descriptions
            self.recommender.formatRecommenderResults(recommendedData)

        # Display fields should append Format suffix to identify which version to display, but use original for header labels
        (self.requestData["fieldHeaders"], displayFieldsFormatSuffixed
         ) = self.prepareDisplayHeaders(displayFields)

        # Format for HTML and add a control field for interaction with the data
        for dataModel in recommendedData:
            self.prepareResultRow(dataModel, displayFields)

        # Try organize by category
        if self.requestData["groupByCategory"]:
            recommendedData = self.recommender.organizeByCategory(
                recommendedData)

        colNames = ["controls"]
        # "name" for code. ,"category_description"
        colNames.extend(displayFieldsFormatSuffixed)
        colNames.extend(["description"])

        lastModel = None
        htmlLines = list()
        for dataModel in recommendedData:
            newCategory = (lastModel is None
                           or lastModel["category_description"] !=
                           dataModel["category_description"])
            showCategory = (self.requestData["groupByCategory"]
                            and newCategory)
            # Limit category display if many repeats
            if showCategory:
                htmlLines.append(CATEGORY_HEADER_TEMPLATE % dataModel)
            htmlLines.append(
                self.formatRowHTML(dataModel, colNames, showCategory))
            lastModel = dataModel
        self.requestData["dataRows"] = str.join("\n", htmlLines)
Пример #12
0
    def test_recommenderAnalysis(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        #analysisQuery.recommender = BaselineFrequencyRecommender();
        analysisQuery.recommender = ItemAssociationRecommender()
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30])
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +1, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-o",
            "-33,-32,-31,-30", "-m", "0", "-R", "ItemAssociationRecommender",
            '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Now try with time limitation on outcome measure
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800)
        # 1 week
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, +0, 0.222, +2, 0.611, +0, 0.222, +1, 0.222],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "604800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "604800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Again, but with much stricter time limit (negative test case)
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 172800)
        # 2 day
        colNames = [
            "patient_id", "outcome.-33", "score.-33", "outcome.-32",
            "score.-32", "outcome.-31", "score.-31", "outcome.-30", "score.-30"
        ]
        expectedResults = [
            RowItemModel([-11111, 0, 0.0109, 2, 0.0600, 0, 0.0109, 0, 0.0109],
                         colNames)
        ]
        analysisResults = self.analyzer(analysisQuery)
        self.assertEqualStatResults(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "OutcomePredictionAnalysis.py", "-c", "-7", "-Q", "86400", "-t",
            "172800", "-o", "-33,-32,-31,-30", "-m", "0", "-R",
            "ItemAssociationRecommender", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)

        # Redo through prepared file intermediary
        sys.stdout = StringIO()
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V", "86400",
            "-t", "172800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.preparer.main(argv)
        preparedDataFile = StringIO(sys.stdout.getvalue())

        sys.stdin = preparedDataFile
        # Read prepared data file from redirected stdin
        sys.stdout = StringIO()
        argv = [
            "OutcomePredictionAnalysis.py", "-P", "-m", "0", "-R",
            "ItemAssociationRecommender", "-t", "172800", '-', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualStatResultsTextOutput(expectedResults, textOutput,
                                              colNames)
Пример #13
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <recommenderName> <patientIds> [<outputFile>]\n"+\
                    "   <patientIds/dataFile>    Name of file with patient ids.  If not found, then interpret as comma-separated list of test Patient IDs to prepare analysis data for.  Alternatively, provide preparedPatientItemFile generated from PreparePatientItems as input.\n"+\
                    "   <outputFile>    If query yields a result set, then that will be output\n"+\
                    "                       to the named file.  Leave blank or specify \"-\" to send to stdout.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-c",
            "--baseCategoryId",
            dest="baseCategoryId",
            help=
            "ID of clinical item category to look for initial items from (probably the ADMIT Dx item)."
        )
        parser.add_option(
            "-Q",
            "--queryTimeSpan",
            dest="queryTimeSpan",
            help=
            "Time frame specified in seconds over which to look for initial query items (e.g., 24hrs = 86400) after the base item found from the category above.  Start the time counting from the first item time occuring after the category item above since the ADMIT Dx items are often keyed to dates only without times (defaulting to midnight of the date specified)."
        )
        parser.add_option(
            "-o",
            "--outcomeItemIds",
            dest="outcomeItemIds",
            help=
            "Comma separated list of outcome item IDs to get prediction / recommendation scores for, as well as to label whether they actually appeared for the given patients.  Can specify virtual items representing the end of item triples (e.g., 5-Readmission being the end of any item followed by 3591-Discharge then 3671-Admit), by adding the component items in expected sequence.  For example, '5=3591:3671'"
        )
        parser.add_option(
            "-t",
            "--timeDeltaMax",
            dest="timeDeltaMax",
            help=
            "Time delta in seconds maximum by which recommendations should be based on.  Defaults to recommending items that occur at ANY time after the key orders.  If provided, will apply limits to only orders placed within 0 seconds, 1 hour (3600), 1 day (86400), or 1 week (604800) of the key orders / items.  If set, will also only count presence of labeled target items if occurs within the given time delta of the first query item."
        )

        parser.add_option(
            "-P",
            "--preparedPatientItemFile",
            dest="preparedPatientItemFile",
            action="store_true",
            help=
            "If set, will expect primary argument to instead be name of file to read input data from, instead of using above parameters to query from database."
        )

        parser.add_option(
            "-R",
            "--recommender",
            dest="recommender",
            help=
            "Name of the recommender to run the analysis against.  Options: %s"
            % list(RECOMMENDER_CLASS_BY_NAME.keys()))
        parser.add_option(
            "-S",
            "--scoreField",
            dest="scoreField",
            help=
            "Name of (derived) field to score items by.  For example, 'conditionalFreq.'"
        )
        parser.add_option(
            "-p",
            "--countPrefix",
            dest="countPrefix",
            help=
            "Which counting method to use for item associations.  Defaults to counting item occurrences, allowing for duplicates.  Additional options include: %s."
            % list(COUNT_PREFIX_OPTIONS))
        parser.add_option(
            "-a",
            "--aggregationMethod",
            dest="aggregationMethod",
            help=
            "Aggregation method to use for recommendations based off multiple query items.  Options: %s."
            % list(AGGREGATOR_OPTIONS))
        parser.add_option(
            "-s",
            "--skipIfOutcomeInQuery",
            dest="skipIfOutcomeInQuery",
            action="store_true",
            help=
            "If set, will skip patients where the outcome item occurs during the query period since that would defy the point of predicting the outcome."
        )
        parser.add_option(
            "-m",
            "--maxRecommendedId",
            dest="maxRecommendedId",
            help=
            "Specify a maximum ID value to accept for recommended items.  More used to limit output in test cases"
        )
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()
        if len(args) > 0:
            # Parse out the query parameters
            query = AnalysisQuery()
            query.recommender = RECOMMENDER_CLASS_BY_NAME[
                options.recommender]()
            query.recommender.dataManager.dataCache = dict()
            # Use local cache to speed up repeat queries

            query.baseRecQuery = RecommenderQuery()
            if options.preparedPatientItemFile:
                # Don't reconstruct validation data through database, just read off validation file
                query.preparedPatientItemFile = stdOpen(args[0])
            else:

                patientIdsParam = args[0]
                try:
                    # Try to open patient IDs as a file
                    patientIdFile = stdOpen(patientIdsParam)
                    query.patientIds = set(patientIdFile.read().split())
                except IOError:
                    # Unable to open as a filename, then interpret as simple comma-separated list
                    query.patientIds = set(patientIdsParam.split(","))

                query.baseCategoryId = int(options.baseCategoryId)
                # Category to look for clinical item to start accruing query items from
                query.queryTimeSpan = timedelta(0, int(options.queryTimeSpan))

                query.baseRecQuery.targetItemIds = set()

                outcomeIdStrList = options.outcomeItemIds.split(",")
                for outcomeIdStr in outcomeIdStrList:
                    outcomeIdComponents = outcomeIdStr.split("=")
                    outcomeId = int(outcomeIdComponents[0])
                    query.baseRecQuery.targetItemIds.add(outcomeId)
                    if len(outcomeIdComponents) > 1:
                        sequenceIds = [
                            int(seqIdStr)
                            for seqIdStr in outcomeIdComponents[1].split(":")
                        ]
                        query.sequenceItemIdsByVirtualItemId[
                            outcomeId] = tuple(sequenceIds)

            if options.timeDeltaMax is not None:
                query.baseRecQuery.timeDeltaMax = timedelta(
                    0, int(options.timeDeltaMax))
            if options.scoreField is not None:
                query.baseRecQuery.sortField = options.scoreField
            if options.countPrefix is not None:
                query.baseRecQuery.countPrefix = options.countPrefix
            if options.aggregationMethod is not None:
                query.baseRecQuery.aggregationMethod = options.aggregationMethod
            if options.maxRecommendedId is not None:
                query.baseRecQuery.maxRecommendedId = int(
                    options.maxRecommendedId)

            if options.skipIfOutcomeInQuery is not None:
                query.skipIfOutcomeInQuery = options.skipIfOutcomeInQuery

            # Run the actual analysis
            analysisResults = self(query)

            # Format the results for output
            outputFilename = None
            if len(args) > 1:
                outputFilename = args[1]
            outputFile = stdOpen(outputFilename, "w")

            # Print comment line with analysis arguments to allow for deconstruction later
            print(COMMENT_TAG, json.dumps({"argv": argv}), file=outputFile)

            colNames = self.analysisHeaders(query)
            analysisResults.insert(0, RowItemModel(colNames, colNames))
            # Insert a mock record to get a header / label row

            formatter = TextResultsFormatter(outputFile)
            formatter.formatResultDicts(analysisResults, colNames)

        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
Пример #14
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <patientIds/dataFile> [<outputFile>]\n"+\
                    "   <patientIds/dataFile>    Name of file with patient ids.  If not found, then interpret as comma-separated list of test Patient IDs to prepare analysis data for.  Alternatively, provide preparedPatientItemFile generated from PreparePatientItems as input.\n"+\
                    "   <outputFile>    If query yields a result set, then that will be output\n"+\
                    "                       to the named file.  Leave blank or specify \"-\" to send to stdout.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-q",
            "--numQuery",
            dest="numQuery",
            help=
            "Number of orders / items from each patient to use as query items to prime the recommendations.  If set to a float number in (0,1), then treat as a percentage of the patient's total orders / items"
        )
        parser.add_option(
            "-v",
            "--numVerify",
            dest="numVerify",
            help=
            "Number of orders / items from each patient after the query items to use to validate recommendations.  If set to a float number in (0,1), then treat as a percentage of the patient's total orders / items.  If left unset, then just use all remaining orders / items for that patient"
        )
        parser.add_option(
            "-c",
            "--baseCategoryId",
            dest="baseCategoryId",
            help=
            "Instead of specifying first nQ query items, specify ID of clinical item category to look for initial items from (probably the ADMIT Dx item)."
        )
        parser.add_option(
            "-b",
            "--baseItemId",
            dest="baseItemId",
            help=
            "Instead of specifying first nQ query items, specify ID of the specific clinical item to look for initial items from."
        )
        parser.add_option(
            "-S",
            "--startDate",
            dest="startDate",
            help="Only look for test data occuring on or after this start date."
        )
        parser.add_option(
            "-E",
            "--endDate",
            dest="endDate",
            help="Only look for test data occuring before this end date.")
        parser.add_option(
            "-Q",
            "--queryTimeSpan",
            dest="queryTimeSpan",
            help=
            "Time frame specified in seconds over which to look for initial query items (e.g., 24hrs = 86400) after the base item found from the category above.  Start the time counting from the first item time occuring after the category item above since the ADMIT Dx items are often keyed to dates only without times (defaulting to midnight of the date specified)."
        )
        parser.add_option(
            "-V",
            "--verifyTimeSpan",
            dest="verifyTimeSpan",
            help=
            "Time frame specified in seconds over which to look for verify items after initial query item time.  Will ignore the query items that occur within the queryTimeSpan."
        )

        parser.add_option(
            "-P",
            "--preparedPatientItemFile",
            dest="preparedPatientItemFile",
            action="store_true",
            help=
            "If set, will expect primary argument to instead be name of file to read input data from, instead of using above parameters to query from database."
        )

        parser.add_option(
            "-R",
            "--recommender",
            dest="recommender",
            help=
            "Name of the recommender to run the analysis against.  Options: %s"
            % list(RECOMMENDER_CLASS_BY_NAME.keys()))
        parser.add_option(
            "-r",
            "--numRecs",
            dest="numRecs",
            help=
            "Number of orders / items to recommend for comparison against the verification set. Alternative set option numRecsByOrderSet to look for key order set usage and size."
        )
        parser.add_option(
            "-O",
            "--numRecsByOrderSet",
            dest="numRecsByOrderSet",
            action="store_true",
            help=
            "If set, then look for an order_set_id column to find the key order set that triggered the evaluation time point to determine number of recommendations to consider."
        )
        parser.add_option(
            "-s",
            "--sortField",
            dest="sortField",
            help=
            "Allow overriding of default sort field when returning ranked results"
        )
        parser.add_option(
            "-f",
            "--fieldFilters",
            dest="fieldFilters",
            help=
            "Filters to exclude results.  Comma-separated separated list of field-op:value exclusions where op is either < or > like, conditionalFreq<:0.1,frqeRatio<:1"
        )
        parser.add_option(
            "-t",
            "--timeDeltaMax",
            dest="timeDeltaMax",
            help=
            "If set, represents a time delta in seconds maximum by which recommendations should be based on.  Defaults to recommending items that occur at ANY time after the key orders.  If provided, will apply limits to only orders placed within 0 seconds, 1 hour (3600), 1 day (86400), or 1 week (604800) of the key orders / items."
        )
        parser.add_option(
            "-a",
            "--aggregationMethod",
            dest="aggregationMethod",
            help=
            "Aggregation method to use for recommendations based off multiple query items.  Options: %s."
            % list(AGGREGATOR_OPTIONS))
        parser.add_option(
            "-p",
            "--countPrefix",
            dest="countPrefix",
            help=
            "Prefix for how to do counts.  Blank for default item counting allowing repeats, otherwise ignore repeats for patient_ or encounter_"
        )
        parser.add_option(
            "-m",
            "--maxRecommendedId",
            dest="maxRecommendedId",
            help=
            "Specify a maximum ID value to accept for recommended items.  More used to limit output in test cases"
        )

        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()
        if len(args) >= 1:
            # Parse out the query parameters
            query = AnalysisQuery()
            query.recommender = RECOMMENDER_CLASS_BY_NAME[
                options.recommender]()
            query.recommender.dataManager.dataCache = dict()
            # Use a dataCache to facilitate repeat queries

            if options.preparedPatientItemFile:
                # Don't reconstruct validation data through database, just read off validation file
                query.preparedPatientItemFile = stdOpen(args[0])
            else:
                patientIdsParam = args[0]
                try:
                    # Try to open patient IDs as a file
                    patientIdFile = stdOpen(patientIdsParam)
                    query.patientIds = set(patientIdFile.read().split())
                except IOError:
                    # Unable to open as a filename, then interpret as simple comma-separated list
                    query.patientIds = set(patientIdsParam.split(","))

                if options.numQuery is not None:
                    query.numQueryItems = int(options.numQuery)
                    query.numVerifyItems = int(options.numVerify)
                else:
                    # Alternative to specify query time span starting from a key category
                    query.queryTimeSpan = timedelta(0,
                                                    int(options.queryTimeSpan))
                    query.verifyTimeSpan = timedelta(
                        0, int(options.verifyTimeSpan))

                if options.baseCategoryId is not None or options.baseItemId is not None:
                    if options.baseCategoryId is not None:
                        query.baseCategoryId = int(options.baseCategoryId)
                        # Category to look for clinical item to start accruing query items from
                    if options.baseItemId is not None:
                        query.baseItemId = int(options.baseItemId)

                if options.startDate is not None:
                    query.startDate = DBUtil.parseDateValue(options.startDate)
                if options.endDate is not None:
                    query.endDate = DBUtil.parseDateValue(options.endDate)

            query.baseRecQuery = RecommenderQuery()
            query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds(
            )
            query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds(
            )
            if options.timeDeltaMax is not None and len(
                    options.timeDeltaMax) > 0:
                query.baseRecQuery.timeDeltaMax = timedelta(
                    0, int(options.timeDeltaMax))
            if options.aggregationMethod is not None:
                query.baseRecQuery.aggregationMethod = options.aggregationMethod
            if options.countPrefix is not None:
                query.baseRecQuery.countPrefix = options.countPrefix
            if options.maxRecommendedId is not None:
                query.baseRecQuery.maxRecommendedId = int(
                    options.maxRecommendedId)
            if options.sortField is not None:
                query.baseRecQuery.sortField = options.sortField
            if options.fieldFilters is not None:
                for fieldFilterStr in options.fieldFilters.split(","):
                    (fieldOp, valueStr) = fieldFilterStr.split(":")
                    query.baseRecQuery.fieldFilters[fieldOp] = float(valueStr)

            if options.numRecs is not None:
                query.numRecommendations = int(options.numRecs)
            else:
                # No recommendation count specified, then just use the same as the verify number
                query.numRecommendations = query.numVerifyItems
            query.numRecsByOrderSet = options.numRecsByOrderSet

            # Run the actual analysis
            analysisResults = self(query)

            # Format the results for output
            outputFilename = None
            if len(args) > 1:
                outputFilename = args[1]
            outputFile = stdOpen(outputFilename, "w")

            # Print comment line with analysis arguments to allow for deconstruction later
            summaryData = {
                "argv": argv
            }
            print(COMMENT_TAG, json.dumps(summaryData), file=outputFile)

            formatter = TextResultsFormatter(outputFile)
            colNames = self.resultHeaders(query)
            formatter.formatTuple(colNames)
            # Insert a mock record to get a header / label row
            formatter.formatResultDicts(analysisResults, colNames)

        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
Пример #15
0
    def test_dataCache(self):
        # Test that repeating queries with cache turned on will not result in extra DB queries
        query = RecommenderQuery()
        query.countPrefix = "patient_"
        query.queryItemIds = set([-2, -5])
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        headers = ["clinical_item_id", "conditionalFreq", "freqRatio"]

        # First query without cache
        self.recommender.dataManager.dataCache = None
        baselineData = self.recommender(query)
        baselineQueryCount = self.recommender.dataManager.queryCount

        # Redo query with cache
        self.recommender.dataManager.dataCache = dict()
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        # Ensure getting same results
        self.assertNotEqual(baselineQueryCount, newQueryCount)
        # Expect needed more queries since no prior cache
        baselineQueryCount = newQueryCount

        # Again, but should be no new query since have cached results last time
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)

        # Repeat multiple times, should still have no new query activity
        # prog = ProgressDots(10,1,"repeats");
        for iRepeat in xrange(10):
            newData = self.recommender(query)
            newQueryCount = self.recommender.dataManager.queryCount
            self.assertEqualRecommendedData(baselineData, newData, query)
            self.assertEqual(baselineQueryCount, newQueryCount)
            # prog.update();
        # prog.printStatus();

        # Query for subset should still yield no new query
        query.queryItemIds = set([-2])
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect no queries for subsets

        # Repeat query for subset
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect no queries for subsets

        # Query for partial subset, partial new
        query.queryItemIds = set([-5, -6])
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqual(baselineQueryCount, newQueryCount)
        # Expect now new queries for subsets, because first query should have done mass-all query

        # Repeat for partial subset, no longer new
        newData = self.recommender(query)
        newQueryCount = self.recommender.dataManager.queryCount
        baselineData = newData
        # New baseline for subset
        self.assertEqualRecommendedData(baselineData, newData, query)
        self.assertEqual(baselineQueryCount, newQueryCount)
Пример #16
0
admitDxIdSectionGuidelineNameTuples = set()
# Keep track of each guideline name set
itemIdsByAdmitDxId = dict()
for admitDxId, sectionName, guidelineName, itemId, itemName, itemDescription, itemCount in resultsTable:
    if admitDxId not in itemIdsByAdmitDxId:
        itemIdsByAdmitDxId[admitDxId] = set()
    itemIdsByAdmitDxId[admitDxId].add(itemId)
    admitDxIdSectionGuidelineNameTuples.add(
        (admitDxId, sectionName, guidelineName))

recommender = ItemAssociationRecommender()

for admitDxId, itemIds in itemIdsByAdmitDxId.iteritems():
    print >> sys.stderr, admitDxId, len(itemIds)
    recQuery = RecommenderQuery()
    recQuery.excludeItemIds = recommender.defaultExcludedClinicalItemIds()
    recQuery.excludeCategoryIds = recommender.defaultExcludedClinicalItemCategoryIds(
    )
    recQuery.queryItemIds = [admitDxId]
    recQuery.timeDeltaMax = timedelta(1)
    # Within one day
    recQuery.countPrefix = "patient_"
    recQuery.limit = TOP_ITEM_COUNT

    # Top results by P-value
    recQuery.sortField = "P-YatesChi2-NegLog"
    results = recommender(recQuery)
    #recommender.formatRecommenderResults(results);
    for result in results:
        itemIds.add(result["clinical_item_id"])
Пример #17
0
    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.sortField = "tf"
        query.limit = 16
        # Go ahead and query for all since short list and can get expected calculation results for all
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id", "score"]
        expectedData = \
            [   RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                RowItemModel( [-6, 2.0/13], headers ),
                RowItemModel( [-1, 1.0/13], headers ),
                RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                RowItemModel( [-14,1.0/13], headers ),
                RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        expectedData = \
            [   RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                RowItemModel( [-6, 2.0/13], headers ),
                RowItemModel( [-1, 1.0/13], headers ),
                RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                RowItemModel( [-14,1.0/13], headers ),
                RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        expectedData = \
            [   #RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                RowItemModel( [-6, 2.0/13], headers ),
                #RowItemModel( [-1, 1.0/13], headers ),
                #RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                #RowItemModel( [-14,1.0/13], headers ),
                #RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6, -10])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        expectedData = \
            [   #RowItemModel( [-2, 2.0/13], headers ),
                RowItemModel( [-5, 2.0/13], headers ),
                #RowItemModel( [-6, 2.0/13], headers ),
                #RowItemModel( [-1, 1.0/13], headers ),
                #RowItemModel( [-3, 1.0/13], headers ),
                RowItemModel( [-7, 1.0/13], headers ),
                RowItemModel( [-8, 1.0/13], headers ),
                #RowItemModel( [-10,1.0/13], headers ),
                RowItemModel( [-11,1.0/13], headers ),
                RowItemModel( [-12,1.0/13], headers ),
                RowItemModel( [-13,1.0/13], headers ),
                #RowItemModel( [-14,1.0/13], headers ),
                #RowItemModel( [-15,1.0/13], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        expectedData = \
            [   RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ),
                RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ),

                RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        expectedData = \
            [   #RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ),
                #RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ),
                #RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ),

                #RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ),

                RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        expectedData = \
            [   RowItemModel( [-6, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-5, (1.0/6)*(2.0/2)+(1.0/4)*(1.0/2)], headers ),
                #RowItemModel( [-2, (1.0/6)*(1.0/2)+(1.0/6)*(2.0/2)], headers ),

                #RowItemModel( [-3, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-7, (1.0/6)*(2.0/2)], headers ),
                RowItemModel( [-8, (1.0/6)*(2.0/2)], headers ),

                RowItemModel( [-14,(1.0/4)*(1.0/2)], headers ),
                RowItemModel( [-15,(1.0/4)*(1.0/2)], headers ),

                RowItemModel( [-1, (1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-10,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-11,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-12,(1.0/6)*(1.0/2)], headers ),
                RowItemModel( [-13,(1.0/6)*(1.0/2)], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query, sort by TF*IDF lift.")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.sortField = "lift"
        expectedData = \
            [   #RowItemModel( [-5, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ),
                #RowItemModel( [-2, (13.0/2)*((1.0/6)*(1.0/2)+(1.0/6)*(2.0/2))], headers ),

                RowItemModel( [-3, (13.0/1)*((1.0/6)*(2.0/2))], headers ),
                RowItemModel( [-7, (13.0/1)*((1.0/6)*(2.0/2))], headers ),
                RowItemModel( [-8, (13.0/1)*((1.0/6)*(2.0/2))], headers ),

                RowItemModel( [-6, (13.0/2)*((1.0/6)*(2.0/2)+(1.0/4)*(1.0/2))], headers ),

                RowItemModel( [-14,(13.0/1)*((1.0/4)*(1.0/2))], headers ),
                RowItemModel( [-15,(13.0/1)*((1.0/4)*(1.0/2))], headers ),

                RowItemModel( [-1, (13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-10,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-11,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-12,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
                RowItemModel( [-13,(13.0/1)*((1.0/6)*(1.0/2))], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)
Пример #18
0
    def test_recommender(self):
        # Run the recommender against the mock test data above and verify expected stats afterwards.

        query = RecommenderQuery()
        #query.queryItemIds = set();
        #query.excludeItemIds = set();
        #query.categoryIds = set();
        #query.timeDeltaMax = None;   # If set to one of the constants (DELTA_ZERO, DELTA_HOUR, etc.), will count item associations that occurred within that time delta as co-occurrent.  If left blank, will just consider all items within a given patient as co-occurrent.
        query.limit = 3
        # Just get top 3 ranks for simplicity
        query.maxRecommendedId = 0
        # Artificial constraint to focus only on test data

        log.debug(
            "Query with no item key input, just return ranks by general likelihood then."
        )
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with key item inputs for which no data exists.  Effecitvely ignore it then, so just return ranks by general likelihood."
        )
        query.queryItemIds = set([-100])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-3], headers ),
                RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("Query with category filter on recommended results.")
        query.queryItemIds = set([-100])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "Query with category filter and specific exclusion filter on recommended results."
        )
        query.queryItemIds = set([-100])
        query.excludeItemIds = set([-6])
        query.excludeCategoryIds = set([-1, -4, -5, -6])
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-5], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query with a couple of input clinical items + one with no association data (should effectively be ignored)."
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
                RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug(
            "General query but set a limit on time delta worth counting item associations"
        )
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
                RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with category limit")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set()
        query.excludeCategoryIds = set([-2, -4, -5, -6])
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-4], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)

        log.debug("General query with specific exclusion")
        query.queryItemIds = set([-2, -5, -100])
        query.excludeItemIds = set([-4, -3, -2])
        query.excludeCategoryIds = set()
        query.timeDeltaMax = DELTA_HOUR
        headers = ["clinical_item_id"]
        expectedData = \
            [   RowItemModel( [-6], headers ),
            ]
        recommendedData = self.recommender(query)
        self.assertEqualRecommendedData(expectedData, recommendedData, query)
Пример #19
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <inputFile> [<outputFile>]\n"+\
                    "   <inputFile>    Validation file in prepared result file format.  Predict items and compare against verify sets similar to RecommendationClassficationAnalysis. \n"+\
                    "   <outputFile>   Validation result stat summaries.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-X",
            "--excludeCategoryIds",
            dest="excludeCategoryIds",
            help=
            "For recommendation, exclude / skip any items who fall under one of these comma-separated category Ids."
        )
        parser.add_option(
            "-s",
            "--sortField",
            dest="sortField",
            default=DEFAULT_SORT_FIELD,
            help=
            "Score field to sort top recommendations by.  Default to posterior probabilty / positive predictive value 'P(B|A)', but can also select 'lift' = 'tfidf' = 'interest' for TF*IDF style score weighting."
        )
        parser.add_option(
            "-r",
            "--numRecs",
            dest="numRecs",
            default=DEFAULT_RECOMMENDED_ITEM_COUNT,
            help=
            "Number of orders / items to recommend for comparison against the verification set."
        )
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()
        if len(args) >= 1:
            query = AnalysisQuery()
            query.preparedPatientItemFile = stdOpen(args[0])
            query.recommender = OrderSetRecommender()
            query.baseRecQuery = RecommenderQuery()
            if options.excludeCategoryIds is not None:
                query.baseRecQuery.excludeCategoryIds = set()
                for categoryIdStr in options.executeCategoryIds.split(","):
                    query.baseRecQuery.excludeCategoryIds.add(
                        int(categoryIdStr))
            else:  # Default exclusions if none specified
                query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds(
                )
                query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds(
                )

            query.baseRecQuery.sortField = options.sortField
            query.numRecommendations = int(options.numRecs)

            # Run the actual analysis
            analysisResults = self(query)

            # Format the results for output
            outputFilename = None
            if len(args) > 1:
                outputFilename = args[1]
            outputFile = stdOpen(outputFilename, "w")

            # Print comment line with analysis arguments to allow for deconstruction later
            summaryData = {
                "argv": argv
            }
            print(COMMENT_TAG, json.dumps(summaryData), file=outputFile)

            formatter = TextResultsFormatter(outputFile)
            colNames = self.resultHeaders(query)
            formatter.formatTuple(colNames)
            # Insert a mock record to get a header / label row
            formatter.formatResultDicts(analysisResults, colNames)
        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
Пример #20
0
    def test_analysisPreparation(self):
        # Run the analysis preparer against the mock test data above and verify expected data afterwards.
        analysisQuery = AnalysisQuery()
        analysisQuery.patientIds = set([-11111])
        analysisQuery.baseCategoryId = -7
        analysisQuery.queryTimeSpan = timedelta(0, 86400)
        analysisQuery.verifyTimeSpan = timedelta(0, 604800)
        analysisQuery.baseRecQuery = RecommenderQuery()
        analysisQuery.baseRecQuery.targetItemIds = set([-33, -32, -31, -30])
        analysisQuery.baseRecQuery.excludeItemIds = [-13]
        analysisQuery.baseRecQuery.excludeCategoryIds = [-5]
        analysisQuery.baseRecQuery.maxRecommendedId = 0
        # Restrict to test data

        # Initial run without time limits on outcome measure
        colNames = [
            "patient_id", "baseItemId", "queryItemCountById",
            "verifyItemCountById", "outcome.-33", "outcome.-32", "outcome.-31",
            "outcome.-30"
        ]
        expectedResults = [
            RowItemModel([
                -11111, -21, {
                    -4: 2,
                    -10: 1,
                    -8: 1,
                    -32: 1
                }, {
                    -30: 1
                }, +0, +2, +1, +1
            ], colNames)
        ]
        analysisResults = list(self.analyzer(analysisQuery))
        self.assertEqualResultDicts(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V",
            "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualTextOutput(expectedResults, textOutput, colNames)

        # Now try with time limitation on outcome measure
        analysisQuery.baseRecQuery.timeDeltaMax = timedelta(0, 604800)
        # 1 week
        colNames = [
            "patient_id", "queryItemCountById", "verifyItemCountById",
            "outcome.-33", "outcome.-32", "outcome.-30"
        ]
        expectedResults = [
            RowItemModel(
                [-11111, {
                    -4: 2,
                    -10: 1,
                    -8: 1,
                    -32: 1
                }, {
                    -30: 1
                }, +0, +2, +1], colNames)
        ]
        analysisResults = list(self.analyzer(analysisQuery))
        self.assertEqualResultDicts(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "86400", "-V",
            "604800", "-t", "604800", "-o", "-33,-32,-31,-30", '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualTextOutput(expectedResults, textOutput, colNames)

        # Now include all historical (demographic) data items
        analysisQuery.pastCategoryIds = [-8]
        colNames = [
            "patient_id", "queryItemCountById", "verifyItemCountById",
            "outcome.-33", "outcome.-32", "outcome.-30"
        ]
        expectedResults = [
            RowItemModel([
                -11111, {
                    -43: 1,
                    -45: 1,
                    -41: 1,
                    -46: 1,
                    -4: 2,
                    -10: 1,
                    -8: 1,
                    -32: 1
                }, {
                    -30: 1
                }, +0, +2, +1
            ], colNames)
        ]
        analysisResults = list(self.analyzer(analysisQuery))
        self.assertEqualResultDicts(expectedResults, analysisResults, colNames)

        # Redo but run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "PreparePatientItems.py", "-p", "-8", "-c", "-7", "-Q", "86400",
            "-V", "604800", "-t", "604800", "-o", "-33,-32,-31,-30",
            '0,-11111', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualTextOutput(expectedResults, textOutput, colNames)

        # Different search where items are recorded with date level instead of time level precision
        # Note that use a verify time threshold of (1 day + 1 second) so can just capture the next day of of data instead of just missing it
        colNames = [
            "patient_id", "baseItemId", "queryItemCountById",
            "verifyItemCountById", "outcome.-33", "outcome.-32", "outcome.-31",
            "outcome.-30"
        ]
        expectedResults = [
            RowItemModel([
                -44444, -21, {
                    -6: 1,
                    -12: 1
                }, {
                    -11: 1,
                    -8: 1
                }, +0, +0, +0, +0
            ], colNames)
        ]

        # Run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "PreparePatientItems.py", "-c", "-7", "-Q", "14400", "-V", "86401",
            "-o", "-33,-32,-31,-30", '0,-44444', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualTextOutput(expectedResults, textOutput, colNames)

        # Include background demographics category
        colNames = [
            "patient_id", "baseItemId", "queryItemCountById",
            "verifyItemCountById", "outcome.-33", "outcome.-32", "outcome.-31",
            "outcome.-30"
        ]
        expectedResults = [
            RowItemModel([
                -44444, -21, {
                    -43: 1,
                    -45: 1,
                    -42: 1,
                    -46: 1,
                    -6: 1,
                    -12: 1
                }, {
                    -11: 1,
                    -8: 1
                }, +0, +0, +0, +0
            ], colNames)
        ]

        # Run through command-line interface
        sys.stdout = StringIO()
        # Redirect stdout output to collect test results
        argv = [
            "PreparePatientItems.py", "-p", "-8", "-c", "-7", "-Q", "14400",
            "-V", "86401", "-o", "-33,-32,-31,-30", '0,-44444', "-"
        ]
        self.analyzer.main(argv)
        textOutput = StringIO(sys.stdout.getvalue())
        self.assertEqualTextOutput(expectedResults, textOutput, colNames)
Пример #21
0
diagnoses = open('/Users/jwang/Desktop/Results/diagnoses_to_test.csv', "rU")
diagnoses.readline()

baseQueryStr = "&targetItemIds=&excludeItemIds=71052,71046,71054,71083,71045,71047&excludeCategoryIds=1,58,4,2,160,161,59,13,159,163,23,62,18,11,46,2&timeDeltaMax=86400&sortField=P-YatesChi2-NegLog&sortReverse=True&filterField1=prevalence<:&filterField2=PPV<:&filterField3=RR<:&filterField4=sensitivity<:&filterField5=P-YatesChi2<:&resultCount=4000&invertQuery=false&showCounts=true&countPrefix=patient_&aggregationMethod=weighted&cacheTime=0"
recommender = ItemAssociationRecommender()

diagnosis_count = 0
for line in diagnoses:
    line = line.strip().split(",")
    clinical_item_id = line[0]
    description = " ".join(line[1:])
    queryStr = "queryItemIds=" + str(clinical_item_id) + baseQueryStr
    print('Finding Top Associations for "{0}"'.format(description))

    # Build RecommenderQuery
    query = RecommenderQuery()
    paramDict = dict(urlparse.parse_qsl(queryStr, True))
    query.parseParams(paramDict)

    # Call ItemRecommender
    recommendations = recommender(query)

    # Output to csv file
    description = description.replace("/", ";")
    fname = str(clinical_item_id) + " " + str(description) + ".csv"
    outfname = open(
        "/Users/jwang/Desktop/Results/item_associations_expert_unmatched/" +
        fname, "w")
    outfname.write(
        "clinical_item_id,description,score,PPV,OR,prevalence,RR,P-YatesChi2\n"
    )
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <patientIds> [<outputFile>]\n"+\
                    "   <patientIds>    Patient ID file or Comma-separated list of test Patient IDs to run analysis against\n"+\
                    "   <outputFile>    If query yields a result set, then that will be output\n"+\
                    "                       to the named file.  Leave blank or specify \"-\" to send to stdout.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option("-R", "--recommender",  dest="recommender",  help="Name of the recommender to run the analysis against.  Options: %s" % RECOMMENDER_CLASS_BY_NAME.keys());
        parser.add_option("-s", "--sortField",  dest="sortField",  help="Allow overriding of default sort field when returning ranked results");
        parser.add_option("-f", "--fieldFilters",  dest="fieldFilters",  help="Filters to exclude results.  Comma-separated separated list of field-op:value exclusions where op is either < or > like, conditionalFreq<:0.1,frqeRatio<:1");
        parser.add_option("-t", "--timeDeltaMax",  dest="timeDeltaMax",  help="If set, represents a time delta in seconds maximum by which recommendations should be based on.  Defaults to recommending items that occur at ANY time after the key orders.  If provided, will apply limits to only orders placed within 0 seconds, 1 hour (3600), 1 day (86400), or 1 week (604800) of the key orders / items.");
        parser.add_option("-a", "--aggregationMethod",  dest="aggregationMethod",  help="Aggregation method to use for recommendations based off multiple query items.  Options: %s." % list(AGGREGATOR_OPTIONS) );
        parser.add_option("-p", "--countPrefix",  dest="countPrefix",  help="Prefix for how to do counts.  Blank for default item counting allowing repeats, otherwise ignore repeats for patient_ or encounter_");
        parser.add_option("-q", "--queryItemMax",  dest="queryItemMax",  help="If set, specifies a maximum number of query items to use when analyzing serial recommendations.  Will stop analyzing further for a patient once reach this limit.");
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: "+str.join(" ", argv))
        timer = time.time();
        if len(args) > 1:
            # Parse out the query parameters
            query = AnalysisQuery();
            query.recommender = RECOMMENDER_CLASS_BY_NAME[options.recommender]();
            query.recommender.dataManager.dataCache = dict();   # Use a local cahce to speed up repeat queries

            patientIdsParam = args[0];
            try:
                # Try to open patient IDs as a file
                patientIdFile = stdOpen(patientIdsParam);
                query.patientIds = set( patientIdFile.read().split() );
            except IOError:
                # Unable to open as a filename, then interpret as simple comma-separated list
                query.patientIds = set(patientIdsParam.split(","));


            query.baseRecQuery = RecommenderQuery();
            query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds();
            query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds();
            if options.sortField is not None:
                query.baseRecQuery.sortField = options.sortField;
            if options.fieldFilters is not None:
                for fieldFilterStr in options.fieldFilters.split(","):
                    (fieldOp, valueStr) = fieldFilterStr.split(":");
                    query.baseRecQuery.fieldFilters[fieldOp] = float(valueStr);
            if options.timeDeltaMax is not None and len(options.timeDeltaMax) > 0:
                query.baseRecQuery.timeDeltaMax = timedelta(0,int(options.timeDeltaMax));
            if options.aggregationMethod is not None:
                query.baseRecQuery.aggregationMethod = options.aggregationMethod;
            if options.countPrefix is not None:
                query.baseRecQuery.countPrefix = options.countPrefix;

            if options.queryItemMax is not None:
                query.queryItemMax = int(options.queryItemMax);

            # Run the actual analysis
            analysisResults = self(query);

            # Format the results for output
            outputFilename = None;
            if len(args) > 1:
                outputFilename = args[1];
            outputFile = stdOpen(outputFilename,"w");

            print >> outputFile, "#", argv;  # Print comment line with analysis arguments to allow for deconstruction later

            colNames = ["patientId", "clinicalItemId", "iItem", "iRecItem", "recRank", "recScore"];
            analysisResults.insert(0, colNames);    # Insert a mock record to get a header / label row

            formatter = TextResultsFormatter( outputFile );
            formatter.formatResultSet( analysisResults );

        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer;
        log.info("%.3f seconds to complete",timer);