Пример #1
0
    def _getAdmitDateRange(self):
        # Get list of all clinical item IDs matching admit diagnosis.
        # Get this list in advance to make subsequent query run a bit faster.
        admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds()

        # Build query for earliest and latest admissions.
        # SELECT
        #   MIN(item_date) AS first_admit_date,
        #   MAX(item_date) AS last_admit_date,
        # FROM
        #   patient_item
        # WHERE
        #   clinical_item_id in (admitDxClinicalItemIds)
        query = SQLQuery()
        query.addSelect("MIN(item_date) AS first_admit_date")
        query.addSelect("MAX(item_date) AS last_admit_date")
        query.addFrom("patient_item")
        query.addWhereIn("clinical_item_id", admitDxClinicalItemIds)

        # Execute query and return results.
        results = self._executeCachedDBQuery(query)
        firstAdmitDate = DBUtil.parseDateValue(results[0][0]).date()
        lastAdmitDate = DBUtil.parseDateValue(results[0][1]).date()

        return firstAdmitDate, lastAdmitDate
Пример #2
0
def parseLabResultsFile(labFile):
    log.info("Parse lab results file");
    prog = ProgressDots();
    labsByBaseNameByPatientId = dict(); # Dictionary of dictionaries of lists of result items
    for labResult in TabDictReader(labFile):
        if labResult["ord_num_value"] is not None and labResult["ord_num_value"] != NULL_STRING:
            patientId = int(labResult["pat_id"]);
            labBaseName = labResult["base_name"];
            resultValue = float(labResult["ord_num_value"]);
            resultTime = DBUtil.parseDateValue(labResult["result_time"]);

            if resultValue < LAB_SENTINEL_VALUE:    # Skip apparent placeholder values
                labResult["pat_id"] = labResult["patient_id"] = patientId;
                labResult["ord_num_value"] = resultValue;
                labResult["result_time"] = resultTime;

                if patientId not in labsByBaseNameByPatientId:
                    labsByBaseNameByPatientId[patientId] = dict();
                if labBaseName not in labsByBaseNameByPatientId[patientId]:
                    labsByBaseNameByPatientId[patientId][labBaseName] = list();
                labsByBaseNameByPatientId[patientId][labBaseName].append( labResult );

        prog.update();
    prog.printStatus();
    return labsByBaseNameByPatientId;
    def _insertUMichTestRecords(self):
        db_name = medinfo.db.Env.DB_PARAM['DSN']
        db_path = medinfo.db.Env.DB_PARAM['DATAPATH']
        conn = sqlite3.connect(db_path + '/' + db_name)

        table_names = ['labs', 'pt_info', 'demographics', 'encounters', 'diagnoses']

        for table_name in table_names:
            columns = FMTU.FM_TEST_INPUT_TABLES["%s_columns"%table_name]
            column_types = FMTU.FM_TEST_INPUT_TABLES["%s_column_types"%table_name]

            df = pd.DataFrame()
            for one_line in FMTU.FM_TEST_INPUT_TABLES['%s_data'%table_name]:
                df = df.append(dict(zip(columns, one_line)), ignore_index=True)

            df.to_sql(table_name, conn, if_exists="append", index=False)

        # First, write basic (pat_id, order_time) episode information to TempFile
        # Then, all [[pat_id, event_time]] operations are based on these episodes
        # i.e., pat_id are all from these pat_ids

        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS INTEGER) AS pat_id")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addFrom("labs")
        self.cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        self.factory.setPatientEpisodeInput(self.cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(episode["order_time"])
            resultPatientEpisodes.append(episode)
Пример #4
0
    def test_buildFeatureMatrix_prePostFeatures(self):
        """
        Test features parameter in addClinicalItemFeatures which allows
        client to specify they only want .pre* or .post* columns in feature
        matrix.
        """
        # Verify FeatureMatrixFactory throws Error if patientEpisodeInput
        # has not been set.
        with self.assertRaises(ValueError):
            self.factory.processPatientEpisodeInput()

        # Initialize DB cursor.
        cursor = self.connection.cursor()

        # Build SQL query for list of patient episodes.
        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)")
        patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id")
        patientEpisodeQuery.addSelect("proc_code")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addSelect(
            "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results"
        )
        patientEpisodeQuery.addFrom("stride_order_proc AS sop")
        patientEpisodeQuery.addFrom("stride_order_results AS sor")
        patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id")
        patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB")
        patientEpisodeQuery.addGroupBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        patientEpisodeQuery.addOrderBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        # Set and process patientEpisodeInput.
        self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(
                episode["order_time"])
            resultPatientEpisodes.append(episode)

        # Add TestItem100 and TestItem200 clinical item data.
        self.factory.addClinicalItemFeatures(["TestItem100"], features="pre")
        self.factory.addClinicalItemFeatures(["TestItem200"], features="post")
        self.factory.buildFeatureMatrix()
        resultMatrix = self.factory.readFeatureMatrixFile()
        expectedMatrix = FM_TEST_OUTPUT[
            "test_buildFeatureMatrix_prePostFeatures"]

        self.assertEqualList(resultMatrix[2:], expectedMatrix)
Пример #5
0
def parseClinicalItemFile(itemFile, patientIdCol="patient_id", timeCol="item_date"):
    prog = ProgressDots();
    itemTimesByPatientId = dict();
    for itemData in TabDictReader(itemFile):
        patientId = int(itemData[patientIdCol]);
        itemTime = DBUtil.parseDateValue(itemData[timeCol]);

        itemData[patientIdCol] = patientId;
        itemData[timeCol] = itemTime;

        if patientId not in itemTimesByPatientId:
            itemTimesByPatientId[patientId] = list();
        itemTimesByPatientId[patientId].append( itemTime );

        prog.update();
    prog.printStatus();

    return itemTimesByPatientId;
    def test_dataConversion(self):
        # Run the data conversion on the same data and look for expected records
        log.debug("Run the conversion process...")
        self.converter.convertSourceItems(TEST_START_DATE)

        # Just query back for the same data, de-normalizing the data back to a general table
        testQuery = \
            """
            select 
                pi.external_id,
                pi.patient_id,
                pi.encounter_id,
                cic.description,
                ci.external_id,
                ci.name,
                ci.description,
                pi.num_value,
                pi.text_value,
                pi.item_date
            from
                patient_item as pi,
                clinical_item as ci,
                clinical_item_category as cic
            where
                pi.clinical_item_id = ci.clinical_item_id and
                ci.clinical_item_category_id = cic.clinical_item_category_id and
                cic.source_table = 'stride_order_results'
            order by
                pi.external_id desc, ci.name
            """
        expectedData = \
            [
                [-1748206, -1099, -9890, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 50, None, DBUtil.parseDateValue('7/3/2111 14:21'),],
                [-2658433, -6894, -211, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 45, None, DBUtil.parseDateValue('7/5/2111 0:28'),],
                [-2794591, -4038, -6687, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 70, None, DBUtil.parseDateValue('3/19/2113 19:26'),],
                [-3347071, -6139, -7104, 'Lab Result', None, '25OHD3(Low)', '25-HYDROXY D3 (Low)', 2, None, DBUtil.parseDateValue('9/8/2113 22:10'),],
                [-3393444, -5157, -5537, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 65, None, DBUtil.parseDateValue('10/9/2113 5:03'),],
                [-3580354, -2795, -752, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 49, None, DBUtil.parseDateValue('12/17/2113 0:40'),],
                [-4464954, -4591, -1383, 'Lab Result', None, '25OHD3(InRange)', '25-HYDROXY D3 (InRange)', 55, None, DBUtil.parseDateValue('5/28/2113 23:28'),],
                [-19007449, -9542, -4105, 'Lab Result', None, 'NA(High)', 'SODIUM, SER/PLAS (High)', 157, None, DBUtil.parseDateValue('9/13/2109 11:55'),],
                [-19231504, -1518, -3744, 'Lab Result', None, 'NA(Low)', 'SODIUM, SER/PLAS (Low)', 134, None, DBUtil.parseDateValue('8/20/2109 12:22'),],
                [-21479311, -9844, -5135, 'Lab Result', None, 'NA(InRange)', 'SODIUM, SER/PLAS (InRange)', 142, None, DBUtil.parseDateValue('8/31/2109 15:42'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'HCT(LowPanic)', 'HEMATOCRIT(HCT) (Low Panic)', 19.7, None, DBUtil.parseDateValue('11/29/2111 14:36'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'HGB(LowPanic)', 'HEMOGLOBIN(HGB) (Low Panic)', 7, None, DBUtil.parseDateValue('11/30/2111 7:36'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'MCH(InRange)', 'MCH(MCH) (InRange)', 31.7, None, DBUtil.parseDateValue('10/17/2112 1:09'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'MCHC(InRange)', 'MCHC(MCHC) (InRange)', 35.4, None, DBUtil.parseDateValue('12/13/2112 2:54'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'MCV(InRange)', 'MCV(MCV) (InRange)', 89.7, None, DBUtil.parseDateValue('11/11/2112 2:54'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'PLT(Low)', 'PLATELET COUNT(PLT) (Low)', 11, None, DBUtil.parseDateValue('1/30/2113 13:28'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'RBC(Low)', 'RBC(RBC) (Low)', 2.2, None, DBUtil.parseDateValue('7/11/2113 23:24'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'RDW(High)', 'RDW(RDW) (High)', 33.3, None, DBUtil.parseDateValue('1/27/2113 14:44'),],
                [-22793877, -3261, -4837, 'Lab Result', None, 'WBC(LowPanic)', 'WBC(WBC) (Low Panic)', 0.2, None, DBUtil.parseDateValue('9/25/2109 16:10'),],
                [-22910018, -1862, -621, 'Lab Result', None, 'MG(InRange)', 'MAGNESIUM, SER/PLAS(MGN) (InRange)', 2.1, None, DBUtil.parseDateValue('11/13/2112 8:18'),],
                [-29501223, -9860, -1772, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 5.1, None, DBUtil.parseDateValue('11/29/2111 0:15'),],
                [-29966444, -5690, -1150, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 4.2, None, DBUtil.parseDateValue('11/29/2111 2:27'),],
                [-30560253, -7803, -1772, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 2.6, None, DBUtil.parseDateValue('11/29/2111 16:13'),],
                [-31237072, -124, -8391, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 50.6, None, DBUtil.parseDateValue('11/29/2111 5:45'),],
                [-31300455, -2168, -261, 'Lab Result', None, 'ACETA(High)', 'ACETAMINOPHEN(ACETA) (High)', 270.7, None, DBUtil.parseDateValue('11/29/2111 18:58'),],
                [-31823670, -2130, -3897, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 5.4, None, DBUtil.parseDateValue('11/29/2111 14:08'),],
                [-33197720, -9926, -4898, 'Lab Result', None, 'ACETA(Result)', 'ACETAMINOPHEN(ACETA) (Result)', None, None, DBUtil.parseDateValue('11/29/2111 15:22'),],
                [-33280031, -3858, -6463, 'Lab Result', None, 'ACETA(Result)', 'ACETAMINOPHEN(ACETA) (Result)', 9999999, None, DBUtil.parseDateValue('11/29/2111 7:41'),],
                [-33765278, -4255, -622, 'Lab Result', None, '9374R(Result)', 'INTERPRETATION/ COMMENTS CLASS II 9374R (Result)', 9999999, None, DBUtil.parseDateValue('9/22/2112 20:26'),],
                [-35954787, -7074, -6965, 'Lab Result', None, 'GTP53(Result)', 'TP53(GTP53) (Result)', 9999999, None, DBUtil.parseDateValue('8/19/2109 16:39'),],
                [-36668349, -9815, -3658, 'Lab Result', None, 'ACETA(InRange)', 'ACETAMINOPHEN(ACETA) (InRange)', 7.7, None, DBUtil.parseDateValue('10/30/2111 7:23'),],
                [-38543619, -6562, -4489, 'Lab Result', None, 'GTP53(Result)', 'TP53(GTP53) (Result)', 9999999, None, DBUtil.parseDateValue('10/23/2109 14:30'),],
                [-39004110, -5750, -4953, 'Lab Result', None, 'YLEPT1(InRange)', 'LEPTIN (InRange)', 20, None, DBUtil.parseDateValue('8/26/2112 15:07'),],
                [-40604146, -7480, -8730, 'Lab Result', None, '9374R(Result)', 'INTERPRETATION/ COMMENTS CLASS II 9374R (Result)', None, None, DBUtil.parseDateValue('12/13/2111 18:12'),],
            ]
        actualData = DBUtil.execute(testQuery)
        self.assertEqualTable(expectedData, actualData)

        # Query back for stat data
        testQuery = \
            """
            select 
                base_name,
                max_result_flag,
                max_result_in_range
            from
                order_result_stat
            where
                base_name not like 'PreTest_%%'
            order by
                base_name
            """
        # Don't necessarily expect stats for all items if always get a usable result_flag, result_in_range_yn, or sentinel result value
        expectedData = \
            [
                ["25OHD3",None,None],
                #["9374R",None,None],
                ["ACETA",None,None],
                #["GTP53",None,None],
                #["HCT","Low Panic",None],
                #["HGB","Low Panic",None],
                #["MCH",None,"Y"],
                #["MCHC",None,"Y"],
                #["MCV",None,"Y"],
                #["MG",None,"Y"],
                ["NA","Low",None],
                #["PLT","Low",None],
                #["RBC","Low",None],
                #["RDW","High",None],
                #["WBC","Low Panic",None],
                ["YLEPT1",None,None],
            ]
        actualData = DBUtil.execute(testQuery)
        self.assertEqualTable(expectedData, actualData)
Пример #7
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr =  "usage: %prog [options] <patientIds/dataFile> [<outputFile>]\n"+\
                    "   <patientIds/dataFile>    Name of file with patient ids.  If not found, then interpret as comma-separated list of test Patient IDs to prepare analysis data for.  Alternatively, provide preparedPatientItemFile generated from PreparePatientItems as input.\n"+\
                    "   <outputFile>    If query yields a result set, then that will be output\n"+\
                    "                       to the named file.  Leave blank or specify \"-\" to send to stdout.\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-q",
            "--numQuery",
            dest="numQuery",
            help=
            "Number of orders / items from each patient to use as query items to prime the recommendations.  If set to a float number in (0,1), then treat as a percentage of the patient's total orders / items"
        )
        parser.add_option(
            "-v",
            "--numVerify",
            dest="numVerify",
            help=
            "Number of orders / items from each patient after the query items to use to validate recommendations.  If set to a float number in (0,1), then treat as a percentage of the patient's total orders / items.  If left unset, then just use all remaining orders / items for that patient"
        )
        parser.add_option(
            "-c",
            "--baseCategoryId",
            dest="baseCategoryId",
            help=
            "Instead of specifying first nQ query items, specify ID of clinical item category to look for initial items from (probably the ADMIT Dx item)."
        )
        parser.add_option(
            "-b",
            "--baseItemId",
            dest="baseItemId",
            help=
            "Instead of specifying first nQ query items, specify ID of the specific clinical item to look for initial items from."
        )
        parser.add_option(
            "-S",
            "--startDate",
            dest="startDate",
            help="Only look for test data occuring on or after this start date."
        )
        parser.add_option(
            "-E",
            "--endDate",
            dest="endDate",
            help="Only look for test data occuring before this end date.")
        parser.add_option(
            "-Q",
            "--queryTimeSpan",
            dest="queryTimeSpan",
            help=
            "Time frame specified in seconds over which to look for initial query items (e.g., 24hrs = 86400) after the base item found from the category above.  Start the time counting from the first item time occuring after the category item above since the ADMIT Dx items are often keyed to dates only without times (defaulting to midnight of the date specified)."
        )
        parser.add_option(
            "-V",
            "--verifyTimeSpan",
            dest="verifyTimeSpan",
            help=
            "Time frame specified in seconds over which to look for verify items after initial query item time.  Will ignore the query items that occur within the queryTimeSpan."
        )

        parser.add_option(
            "-P",
            "--preparedPatientItemFile",
            dest="preparedPatientItemFile",
            action="store_true",
            help=
            "If set, will expect primary argument to instead be name of file to read input data from, instead of using above parameters to query from database."
        )

        parser.add_option(
            "-R",
            "--recommender",
            dest="recommender",
            help=
            "Name of the recommender to run the analysis against.  Options: %s"
            % list(RECOMMENDER_CLASS_BY_NAME.keys()))
        parser.add_option(
            "-r",
            "--numRecs",
            dest="numRecs",
            help=
            "Number of orders / items to recommend for comparison against the verification set. Alternative set option numRecsByOrderSet to look for key order set usage and size."
        )
        parser.add_option(
            "-O",
            "--numRecsByOrderSet",
            dest="numRecsByOrderSet",
            action="store_true",
            help=
            "If set, then look for an order_set_id column to find the key order set that triggered the evaluation time point to determine number of recommendations to consider."
        )
        parser.add_option(
            "-s",
            "--sortField",
            dest="sortField",
            help=
            "Allow overriding of default sort field when returning ranked results"
        )
        parser.add_option(
            "-f",
            "--fieldFilters",
            dest="fieldFilters",
            help=
            "Filters to exclude results.  Comma-separated separated list of field-op:value exclusions where op is either < or > like, conditionalFreq<:0.1,frqeRatio<:1"
        )
        parser.add_option(
            "-t",
            "--timeDeltaMax",
            dest="timeDeltaMax",
            help=
            "If set, represents a time delta in seconds maximum by which recommendations should be based on.  Defaults to recommending items that occur at ANY time after the key orders.  If provided, will apply limits to only orders placed within 0 seconds, 1 hour (3600), 1 day (86400), or 1 week (604800) of the key orders / items."
        )
        parser.add_option(
            "-a",
            "--aggregationMethod",
            dest="aggregationMethod",
            help=
            "Aggregation method to use for recommendations based off multiple query items.  Options: %s."
            % list(AGGREGATOR_OPTIONS))
        parser.add_option(
            "-p",
            "--countPrefix",
            dest="countPrefix",
            help=
            "Prefix for how to do counts.  Blank for default item counting allowing repeats, otherwise ignore repeats for patient_ or encounter_"
        )
        parser.add_option(
            "-m",
            "--maxRecommendedId",
            dest="maxRecommendedId",
            help=
            "Specify a maximum ID value to accept for recommended items.  More used to limit output in test cases"
        )

        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()
        if len(args) >= 1:
            # Parse out the query parameters
            query = AnalysisQuery()
            query.recommender = RECOMMENDER_CLASS_BY_NAME[
                options.recommender]()
            query.recommender.dataManager.dataCache = dict()
            # Use a dataCache to facilitate repeat queries

            if options.preparedPatientItemFile:
                # Don't reconstruct validation data through database, just read off validation file
                query.preparedPatientItemFile = stdOpen(args[0])
            else:
                patientIdsParam = args[0]
                try:
                    # Try to open patient IDs as a file
                    patientIdFile = stdOpen(patientIdsParam)
                    query.patientIds = set(patientIdFile.read().split())
                except IOError:
                    # Unable to open as a filename, then interpret as simple comma-separated list
                    query.patientIds = set(patientIdsParam.split(","))

                if options.numQuery is not None:
                    query.numQueryItems = int(options.numQuery)
                    query.numVerifyItems = int(options.numVerify)
                else:
                    # Alternative to specify query time span starting from a key category
                    query.queryTimeSpan = timedelta(0,
                                                    int(options.queryTimeSpan))
                    query.verifyTimeSpan = timedelta(
                        0, int(options.verifyTimeSpan))

                if options.baseCategoryId is not None or options.baseItemId is not None:
                    if options.baseCategoryId is not None:
                        query.baseCategoryId = int(options.baseCategoryId)
                        # Category to look for clinical item to start accruing query items from
                    if options.baseItemId is not None:
                        query.baseItemId = int(options.baseItemId)

                if options.startDate is not None:
                    query.startDate = DBUtil.parseDateValue(options.startDate)
                if options.endDate is not None:
                    query.endDate = DBUtil.parseDateValue(options.endDate)

            query.baseRecQuery = RecommenderQuery()
            query.baseRecQuery.excludeCategoryIds = query.recommender.defaultExcludedClinicalItemCategoryIds(
            )
            query.baseRecQuery.excludeItemIds = query.recommender.defaultExcludedClinicalItemIds(
            )
            if options.timeDeltaMax is not None and len(
                    options.timeDeltaMax) > 0:
                query.baseRecQuery.timeDeltaMax = timedelta(
                    0, int(options.timeDeltaMax))
            if options.aggregationMethod is not None:
                query.baseRecQuery.aggregationMethod = options.aggregationMethod
            if options.countPrefix is not None:
                query.baseRecQuery.countPrefix = options.countPrefix
            if options.maxRecommendedId is not None:
                query.baseRecQuery.maxRecommendedId = int(
                    options.maxRecommendedId)
            if options.sortField is not None:
                query.baseRecQuery.sortField = options.sortField
            if options.fieldFilters is not None:
                for fieldFilterStr in options.fieldFilters.split(","):
                    (fieldOp, valueStr) = fieldFilterStr.split(":")
                    query.baseRecQuery.fieldFilters[fieldOp] = float(valueStr)

            if options.numRecs is not None:
                query.numRecommendations = int(options.numRecs)
            else:
                # No recommendation count specified, then just use the same as the verify number
                query.numRecommendations = query.numVerifyItems
            query.numRecsByOrderSet = options.numRecsByOrderSet

            # Run the actual analysis
            analysisResults = self(query)

            # Format the results for output
            outputFilename = None
            if len(args) > 1:
                outputFilename = args[1]
            outputFile = stdOpen(outputFilename, "w")

            # Print comment line with analysis arguments to allow for deconstruction later
            summaryData = {
                "argv": argv
            }
            print(COMMENT_TAG, json.dumps(summaryData), file=outputFile)

            formatter = TextResultsFormatter(outputFile)
            colNames = self.resultHeaders(query)
            formatter.formatTuple(colNames)
            # Insert a mock record to get a header / label row
            formatter.formatResultDicts(analysisResults, colNames)

        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
Пример #8
0
    def test_dataConversion(self):
        # Run the data conversion on the same data and look for expected records
        log.debug("Run the conversion process...")
        convOptions = ConversionOptions()
        convOptions.startDate = TEST_START_DATE
        self.converter.convertSourceItems(convOptions)

        # Just query back for the same data, de-normalizing the data back to a general table
        testQuery = \
            """
            select 
                pi.external_id,
                pi.patient_id,
                pi.encounter_id,
                cic.description,
                ci.name,
                ci.description,
                pi.item_date
            from
                patient_item as pi,
                clinical_item as ci,
                clinical_item_category as cic
            where
                pi.clinical_item_id = ci.clinical_item_id and
                ci.clinical_item_category_id = cic.clinical_item_category_id and
                cic.source_table = 'stride_culture_micro'
            order by
                pi.external_id desc
            """
        expectedData = \
            [   ##### CHANGE to the actual expected data
            [-10, 1, 2, "Microculture Susceptibility General", "Clindamycin:Intermediate", "Intermediate TO Clindamycin", DBUtil.parseDateValue("9/10/2111 13:15"),],
            [-11, 2, 3, "Microculture Susceptibility General", "Vancomycin:Susceptible", "Susceptible TO Vancomycin", DBUtil.parseDateValue("4/26/2109 9:49"),],
            [-12, 3, 4, "Microculture Susceptibility General", "Oxacillin:Resistant", "Resistant TO Oxacillin", DBUtil.parseDateValue("4/18/2109 4:48"),],
            [-13, 4, 5, "Microculture Susceptibility General", "Vancomycin:Susceptible", "Susceptible TO Vancomycin", DBUtil.parseDateValue("3/28/2109 23:21"),],
            [-14, 5, 6, "Microculture Susceptibility General", "Amoxicillin-Clavulanic Acid:Susceptible", "Susceptible TO Amoxicillin-Clavulanic Acid", DBUtil.parseDateValue("6/3/2109 17:07")],
            [-15, 6, 7, "Microculture Susceptibility General", "Negative Culture", "Microculture Grew No Bacteria", DBUtil.parseDateValue("6/4/2109 17:07")]
        ]
        actualData = DBUtil.execute(testQuery)
        self.assertEqualTable(expectedData, actualData)
            SOLP (should ignore hypotonic IVF for now)\t4/6/2009 14:00\t\
            4/7/2009 14:00\tCONTINUOUS\t\t75\n\
        -123090\t-123\t8982\tALBUMIN, HUMAN 5 % 5 % IV SOLP (should ignore \
            albumin for now)\t4/6/2009 14:00\t4/7/2009 14:00\tONCE\t500\t\n\
        -123100\t-123\t27838\tSODIUM CHLORIDE 0.9 % 0.9 % IV SOLP\t\
            4/6/2009 16:30\t4/6/2009 18:00\tCONTINUOUS\t\t500\n\
        -123110\t-123\t4318\tLACTATED RINGERS IV SOLP\t4/6/2009 17:00\t\
            4/6/2009 18:00\tCONTINUOUS\t\t1000\n"
}

# Dictionary mapping from test function to expected output.
FM_TEST_OUTPUT = {
    "test_processPatientEpisodeInput" : [
        { "pat_id": -789, "proc_code": "LABMETB", "normal_results": "0",
            "order_proc_id": "-900",
            "order_time": DBUtil.parseDateValue("5/6/2009 15:00") },
        { "pat_id": -789, "proc_code": "LABMETB", "normal_results": "0",
            "order_proc_id": "-800",
            "order_time": DBUtil.parseDateValue("4/6/2009 16:00") },
        { "pat_id": -789, "proc_code": "LABMETB", "normal_results": "1",
            "order_proc_id": "-750",
            "order_time": DBUtil.parseDateValue("4/26/2009 6:00") },
        { "pat_id": -789, "proc_code": "LABMETB", "normal_results": "1",
            "order_proc_id": "-700",
            "order_time": DBUtil.parseDateValue("4/25/2009 6:00") },
        { "pat_id": -456, "proc_code": "LABMETB", "normal_results": "1",
            "order_proc_id": "-600",
            "order_time": DBUtil.parseDateValue("5/6/2009 15:00") },
        { "pat_id": -456, "proc_code": "LABMETB", "normal_results": "1",
            "order_proc_id": "-400",
            "order_time": DBUtil.parseDateValue("4/25/2009 6:00") },
Пример #10
0
    def test_buildFeatureMatrix_multiFlowsheet(self):
        """
        Test buildFeatureMatrix and addFlowsheet.
        """
        # Verify FeatureMatrixFactory throws Error if patientEpisodeInput
        # has not been set.
        with self.assertRaises(ValueError):
            self.factory.processPatientEpisodeInput()

        # Initialize DB cursor.
        cursor = self.connection.cursor()

        # Build SQL query for list of patient episodes.
        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS bigint)")
        patientEpisodeQuery.addSelect("sop.order_proc_id AS order_proc_id")
        patientEpisodeQuery.addSelect("proc_code")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addSelect(
            "COUNT(CASE result_in_range_yn WHEN 'Y' THEN 1 ELSE null END) AS normal_results"
        )
        patientEpisodeQuery.addFrom("stride_order_proc AS sop")
        patientEpisodeQuery.addFrom("stride_order_results AS sor")
        patientEpisodeQuery.addWhere("sop.order_proc_id = sor.order_proc_id")
        patientEpisodeQuery.addWhereEqual("proc_code", "LABMETB")
        patientEpisodeQuery.addGroupBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        patientEpisodeQuery.addOrderBy(
            "pat_id, sop.order_proc_id, proc_code, order_time")
        cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        # Set and process patientEpisodeInput.
        self.factory.setPatientEpisodeInput(cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(
                episode["order_time"])
            resultPatientEpisodes.append(episode)

        # Verify results (note sort order).
        expectedPatientEpisodes = FM_TEST_OUTPUT[
            "test_processPatientEpisodeInput"]
        self.assertEqualList(resultPatientEpisodes, expectedPatientEpisodes)

        # Add flowsheet features.
        flowsheetNames = ["Resp", "FiO2", "Glasgow Coma Scale Score"]
        # Look for lab data 90 days before each episode, but never afterself.
        preTimeDelta = datetime.timedelta(-90)
        postTimeDelta = datetime.timedelta(0)
        self.factory.addFlowsheetFeatures(flowsheetNames, preTimeDelta,
                                          postTimeDelta)
        self.factory.buildFeatureMatrix()
        resultMatrix = self.factory.readFeatureMatrixFile()

        # Verify results.
        expectedMatrix = FM_TEST_OUTPUT[
            "test_buildFeatureMatrix_multiFlowsheet"]["expectedMatrix"]
        self.assertEqualTable(expectedMatrix, resultMatrix[2:], precision=5)

        try:
            os.remove(self.factory.getMatrixFileName())
        except OSError:
            pass
Пример #11
0
            4/6/2009 18:00\tCONTINUOUS\t\t1000\n"
}

# Dictionary mapping from test function to expected output.
FM_TEST_OUTPUT = {
    "test_processPatientEpisodeInput": [{
        "pat_id":
        -789,
        "proc_code":
        "LABMETB",
        "normal_results":
        "0",
        "order_proc_id":
        "-900",
        "order_time":
        DBUtil.parseDateValue("5/6/2009 15:00")
    }, {
        "pat_id":
        -789,
        "proc_code":
        "LABMETB",
        "normal_results":
        "0",
        "order_proc_id":
        "-800",
        "order_time":
        DBUtil.parseDateValue("4/6/2009 16:00")
    }, {
        "pat_id":
        -789,
        "proc_code":