Пример #1
0
    def generateFigure(self, calibrationResults, figureFilename=None):
        """Quickly generate an example visualization figure with pylab (matplotlib)
        """
        import pylab;   # Only import dependency as needed
        predictedRates = columnFromModelList(calibrationResults,"predictedRate");
        observedRates = columnFromModelList(calibrationResults,"observedRate");
        instanceCounts = columnFromModelList(calibrationResults,"totalInstances");
        
        markerScalar = 20.0 / min(instanceCounts);
        markerWidths = [ instanceCount*markerScalar for instanceCount in instanceCounts ];

        (hlStat, degFreedom, hlP) = self.calculateHosmerLemeshow(calibrationResults);

        xMax = max(predictedRates)*1.05;
        yMax = max(observedRates)*1.05;
        xyMax = max(xMax,yMax);

        pylab.clf();
        pylab.scatter(predictedRates, observedRates, markerWidths, label='P-Hosmer-Lemeshow %0.5f' % hlP);
        pylab.plot([0,xyMax],[0,xyMax], 'k--'); # Diagonal line for optimally calibrated result reference
        pylab.xlim([0.0, xMax]);
        pylab.ylim([0.0, yMax]);
        pylab.xlabel('Predicted Rate');
        pylab.ylabel('Observed Rate');
        pylab.title('Calibration Curve (%s)' % figureFilename );
        pylab.legend(loc="upper left");
        if figureFilename is None:
            # No file specified, just try to do a direct display
            pylab.show(block=True);
        else:
            pylab.savefig(figureFilename);
Пример #2
0
    def __call__(self, inputFile1, inputFile2, options):
        # Parse out the files into score models for each row
        scoreModels1 = self.parseScoreModelsFromFile(
            inputFile1, scoreCols=[options.scoreCol1])
        scoreModels2 = self.parseScoreModelsFromFile(
            inputFile2, scoreCols=[options.scoreCol2])

        # Sort the results by the specified score column and sort order
        scoreModels1.sort(
            RowItemFieldComparator(options.scoreCol1, options.descSort1))
        scoreModels2.sort(
            RowItemFieldComparator(options.scoreCol2, options.descSort2))

        # Pull out the sorted list of key items for each
        itemList1 = columnFromModelList(scoreModels1, options.idCol1)
        itemList2 = columnFromModelList(scoreModels2, options.idCol2)

        # Calculate available ranked list similarity measures
        resultDict = dict()
        resultDict["RBO"] = self.calcRBO(itemList1, itemList2)
        self.populateQueryCounts(scoreModels1, scoreModels2, resultDict)

        return resultDict
Пример #3
0
    def loadPatientInfo(self, patientIds=None, relativeTime=None, conn=None):
        """Load basic information about the specified patients.
        Report patient state at given time, or default to time zero
        """
        if relativeTime is None:
            relativeTime = 0
            # Just look for time zero default then

        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("sp.sim_patient_id")
            query.addSelect("sp.name")
            query.addSelect("sp.age_years")
            query.addSelect("sp.gender")
            query.addSelect("s.sim_state_id")
            query.addSelect("s.name as state_name")
            query.addSelect("s.description as state_description")
            query.addSelect("sps.relative_time_start")
            query.addSelect("sps.relative_time_end")
            query.addFrom("sim_patient as sp")
            query.addFrom("sim_patient_state as sps")
            query.addFrom("sim_state as s")
            query.addWhere("sp.sim_patient_id = sps.sim_patient_id")
            query.addWhere("sps.sim_state_id = s.sim_state_id")
            if patientIds is not None:
                query.addWhereIn("sp.sim_patient_id", patientIds)

            # Look for the state that matches the given relative time offset
            query.addWhereOp("sps.relative_time_start", "<=", relativeTime)
            query.openWhereOrClause()
            query.addWhere("sps.relative_time_end is null")
            query.addWhereOp("sps.relative_time_end", ">", relativeTime)
            query.closeWhereOrClause()

            query.addOrderBy("sp.name")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)

            if len(dataModels) > 0:
                # Secondary query to build lookup table of possible state transition options from patient current states
                subQuery = SQLQuery()
                subQuery.addSelect("pre_state_id")
                subQuery.addSelect("post_state_id")
                subQuery.addSelect("clinical_item_id")
                subQuery.addSelect("time_trigger")
                subQuery.addFrom("sim_state_transition as sst")
                subQuery.addWhereIn(
                    "pre_state_id",
                    columnFromModelList(dataModels, "sim_state_id"))
                subResults = DBUtil.execute(subQuery, conn=conn)

                # For each pre-state, track which clinical items or times trigger which post-states
                postStateIdByItemIdByPreStateId = dict()
                postStateIdTimeTriggerByPreStateId = dict()
                for preStateId, postStateId, itemId, timeTrigger in subResults:
                    if preStateId not in postStateIdByItemIdByPreStateId:
                        postStateIdByItemIdByPreStateId[preStateId] = dict()
                    postStateIdByItemIdByPreStateId[preStateId][
                        itemId] = postStateId

                    if timeTrigger is not None:
                        postStateIdTimeTriggerByPreStateId[preStateId] = (
                            postStateId, timeTrigger)

                # Record in patient result models for retrieval
                for i, dataModel in enumerate(dataModels):
                    patientId = dataModel["sim_patient_id"]
                    stateId = dataModel["sim_state_id"]

                    dataModel["postStateIdByItemId"] = dict()
                    if stateId in postStateIdByItemIdByPreStateId:
                        dataModel[
                            "postStateIdByItemId"] = postStateIdByItemIdByPreStateId[
                                stateId]
                    dataModel["postStateIdTimeTriggerByPreStateId"] = dict()
                    if stateId in postStateIdTimeTriggerByPreStateId:
                        dataModel[
                            "postStateIdTimeTrigger"] = postStateIdTimeTriggerByPreStateId[
                                stateId]

                    if dataModel[
                            "relative_time_end"] is None and "postStateIdTimeTrigger" in dataModel:
                        # Check that we haven't passed (and should thus trigger) a time-based state transition
                        (postStateId,
                         timeTrigger) = dataModel["postStateIdTimeTrigger"]
                        preStateTime = dataModel["relative_time_start"]
                        postStateTriggerTime = (preStateTime + timeTrigger)

                        if postStateTriggerTime <= relativeTime:  # Trigger state transition just by time elapsed
                            #print >> sys.stderr, relativeTime, preStateTime, stateId, postStateTriggerTime, postStateId
                            self.recordStateTransition(patientId,
                                                       stateId,
                                                       postStateId,
                                                       postStateTriggerTime,
                                                       conn=conn)
                            # State change which can yield new triggers, so recursively reload.
                            # Small risk of infinite recusion if timeTriggers are zero. Otherwise, should converge as each recursion will update the preState relativeTimeStart
                            dataModels[i] = self.loadPatientInfo([patientId],
                                                                 relativeTime,
                                                                 conn=conn)[0]

            return dataModels
        finally:
            if not extConn:
                conn.close()
Пример #4
0
def main(argv):
    timer = time.time()

    extractor = DataExtractor()

    patientEpisodes = queryPatientEpisodes(stdOpen("patientEpisodes.tab", "w"),
                                           extractor)
    # Maybe just do this first time, then comment out and load from file with line below
    #patientEpisodes = extractor.parsePatientEpisodeFile(stdOpen("patientEpisodes.tab"), list()); # Read from prior file if main query already done to avoid expensive query
    patientIds = set(columnFromModelList(patientEpisodes, "patient_id"))

    extractor.queryFlowsheet(FLOWSHEET_NAMES, patientIds,
                             stdOpen("Flowsheet.tab.gz", "w"))
    extractor.queryLabResults(LAB_BASE_NAMES, patientIds,
                              stdOpen("LabResults.tab.gz", "w"))

    # Look for specific IV fluid medication subset
    ivfMedIds = set()
    for row in extractor.loadMapData("Medication.IVFluids"):
        if row["group"] == "isotonic":
            ivfMedIds.add(row["medication_id"])
    extractor.queryIVFluids(ivfMedIds, patientIds,
                            stdOpen("IsotonicIVFluids.tab.gz", "w"))

    extractor.queryClinicalItems(loadIVAntibioticItemIds(extractor),
                                 patientIds, stdOpen("IVAntibiotic.tab", "w"))
    extractor.queryClinicalItems(loadBloodCultureItemIds(extractor),
                                 patientIds, stdOpen("BloodCulture.tab", "w"))
    extractor.queryClinicalItems(loadRespiratoryViralPanelItemIds(extractor),
                                 patientIds, stdOpen("RespViralPanel.tab",
                                                     "w"))

    extractor.queryClinicalItemsByName(("AnyICULifeSupport", ), patientIds,
                                       stdOpen("AnyICULifeSupport.tab", "w"))
    extractor.queryClinicalItemsByName(("AnyDNR", ), patientIds,
                                       stdOpen("AnyDNR.tab", "w"))
    extractor.queryClinicalItemsByName(("AnyVasoactive", ), patientIds,
                                       stdOpen("AnyVasoactive.tab", "w"))
    extractor.queryClinicalItemsByName(("AnyCRRT", ), patientIds,
                                       stdOpen("AnyCRRT.tab", "w"))
    extractor.queryClinicalItemsByName(("AnyVentilator", ), patientIds,
                                       stdOpen("AnyVentilator.tab", "w"))
    extractor.queryClinicalItemsByName(("^Comfort Care", ),
                                       patientIds,
                                       stdOpen("ComfortCare.tab", "w"),
                                       col="description",
                                       operator="~*")
    extractor.queryClinicalItemsByName(('consult.*palliative', ),
                                       patientIds,
                                       stdOpen("PalliativeConsult.tab", "w"),
                                       col="description",
                                       operator="~*")

    extractor.queryClinicalItemsByName(("Death", ), patientIds,
                                       stdOpen("Death.tab", "w"))
    extractor.queryClinicalItemsByName(("Birth", ), patientIds,
                                       stdOpen("Birth.tab", "w"))
    extractor.queryClinicalItemsByName(("Male", ), patientIds,
                                       stdOpen("Male.tab", "w"))
    extractor.queryClinicalItemsByName(("Female", ), patientIds,
                                       stdOpen("Female.tab", "w"))
    extractor.queryClinicalItemsByName(
        ("RaceWhiteNonHispanicLatino", ), patientIds,
        stdOpen("RaceWhiteNonHispanicLatino.tab", "w"))
    extractor.queryClinicalItemsByName(("RaceAsian", ), patientIds,
                                       stdOpen("RaceAsian.tab", "w"))
    extractor.queryClinicalItemsByName(
        ("RaceWhiteHispanicLatino", ), patientIds,
        stdOpen("RaceWhiteHispanicLatino.tab", "w"))
    extractor.queryClinicalItemsByName(("RaceHispanicLatino", ), patientIds,
                                       stdOpen("RaceHispanicLatino.tab", "w"))
    extractor.queryClinicalItemsByName(("RaceUnknown", ), patientIds,
                                       stdOpen("RaceUnknown.tab", "w"))
    extractor.queryClinicalItemsByName(("RaceOther", ), patientIds,
                                       stdOpen("RaceOther.tab", "w"))
    extractor.queryClinicalItemsByName(("RaceBlack", ), patientIds,
                                       stdOpen("RaceBlack.tab", "w"))
    extractor.queryClinicalItemsByName(("RacePacificIslander", ), patientIds,
                                       stdOpen("RacePacificIslander.tab", "w"))
    extractor.queryClinicalItemsByName(("RaceNativeAmerican", ), patientIds,
                                       stdOpen("RaceNativeAmerican.tab", "w"))

    # Extract out lists of ICD9 prefixes per disease category
    icd9prefixesByDisease = dict()
    for row in extractor.loadMapData("CharlsonComorbidity-ICD9CM"):
        (disease, icd9prefix) = (row["charlson"], row["icd9cm"])
        if disease not in icd9prefixesByDisease:
            icd9prefixesByDisease[disease] = list()
        icd9prefixesByDisease[disease].append("^ICD9." + icd9prefix)
    for disease, icd9prefixes in icd9prefixesByDisease.items():
        disease = disease.translate(None, " ()-/")
        # Strip off punctuation
        extractor.queryClinicalItemsByName(icd9prefixes,
                                           patientIds,
                                           stdOpen(
                                               "Charlson." + disease + ".tab",
                                               "w"),
                                           operator="~*")

    # Extract out lists of treatment team names per care category
    teamNameByCategory = dict()
    for row in extractor.loadMapData("TreatmentTeamGroups"):
        (category, teamName) = (row["team_category"], row["treatment_team"])
        if category not in teamNameByCategory:
            teamNameByCategory[category] = list()
        teamNameByCategory[category].append(teamName)
    for category, teamNames in teamNameByCategory.items():
        extractor.queryClinicalItemsByName(teamNames,
                                           patientIds,
                                           stdOpen("TT." + category + ".tab",
                                                   "w"),
                                           col="description")

    timer = time.time() - timer
    print("%.3f seconds to complete" % timer, file=sys.stderr)
Пример #5
0
def queryPatientEpisodes(outputFile, extractor):
    log.info(
        "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)"
    )

    conn = DBUtil.connection()
    cursor = conn.cursor()
    try:
        # # Clinical item category for admission diagnoses
        # # ADMIT_DX_CATEGORY_ID = 2;
        # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # # Look for items indicating suspected infection / sepsis
        # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        # bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # # Look for primary surgery teams to exclude
        # excludeTeamCategory = "SurgerySpecialty";
        # excludeTreatmentTeams = list();
        # for row in extractor.loadMapData("TreatmentTeamGroups"):
        #     if row["team_category"] == excludeTeamCategory:
        #         excludeTreatmentTeams.append(row["treatment_team"]);
        # query = SQLQuery();
        # query.addSelect("clinical_item_id");
        # query.addFrom("clinical_item");
        # query.addWhereIn("description", excludeTreatmentTeams );
        # excludeTeamItemIds = set();
        # for row in DBUtil.execute(query, conn=conn):
        #     excludeTeamItemIds.add(row[0]);
        # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            adt1.pat_anon_id in
            (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') 
        and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id
        """ % ("Tt Pamf Med (Primary)")

        print(cohortQuery, file=sys.stderr)
        cursor.execute(cohortQuery)

        patientEpisodes = list()
        patientEpisodeById = dict()

        # Collect Build basic patient ID and
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots()
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId,
                        "edAdmitTime":edAdmitTime,
                        "dischargeTime":dischargeTime,
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                )
            patientEpisodes.append(patientEpisode)
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[
                    patientEpisode["encounter_id"]] = patientEpisode

            prog.update()
            row = cursor.fetchone()
        prog.printStatus()

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id")
        query = SQLQuery()
        query.addSelect("pat_id")
        query.addSelect("pat_enc_csn_id")
        query.addSelect("title")
        query.addSelect("bp_systolic")
        query.addSelect("bp_diastolic")
        query.addSelect("temperature")
        query.addSelect("pulse")
        query.addSelect("respirations")
        query.addFrom("stride_patient_encounter")
        query.addWhereIn("pat_enc_csn_id", encounterIds)
        cursor.execute(str(query), query.params)
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic,
             temperature, pulse, respirations) = row
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId]
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set()
                    # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle)
                patientEpisode["bpSystolic"] = bpSystolic
                patientEpisode["bpDiastolic"] = bpDiastolic
                patientEpisode["temperature"] = temperature
                patientEpisode["pulse"] = pulse
                patientEpisode["respirations"] = respirations
            row = cursor.fetchone()

        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile)
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True)

        return patientEpisodes
    finally:
        cursor.close()
        conn.close()
Пример #6
0
def queryPatientEpisodes(outputFile, extractor):
    log.info("Select patient admissions with possible/probable sepsis within 24 hours of admission (long query >60 min?)...");

    conn = DBUtil.connection();
    cursor = conn.cursor();
    try:
        # Clinical item category for admission diagnoses
        # ADMIT_DX_CATEGORY_ID = 2;
        admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # Look for items indicating suspected infection / sepsis
        ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # Look for primary surgery teams to exclude
        excludeTeamCategory = "SurgerySpecialty";
        excludeTreatmentTeams = list();
        for row in extractor.loadMapData("TreatmentTeamGroups"):
            if row["team_category"] == excludeTeamCategory:
                excludeTreatmentTeams.append(row["treatment_team"]);
        query = SQLQuery();
        query.addSelect("clinical_item_id");
        query.addFrom("clinical_item");
        query.addWhereIn("description", excludeTreatmentTeams );
        excludeTeamItemIds = set();
        for row in DBUtil.execute(query, conn=conn):
            excludeTeamItemIds.add(row[0]);
        excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        --  Pick out date(s) when admitted through emergency department and matching discharge time
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            -- Admission event
            adt1.department_in = 'EMERGENCY DEPARTMENT' and
            adt1.event_in = 'Admission' and
            adt1.pat_anon_id in
            (    -- Select any patient with any suspected sepsis related order (i.e., IV antibiotics or blood cultures)
                select patient_id
                from patient_item as pi
                where pi.clinical_item_id in (%s)
                except
                -- Exclude any patient who has been on a primary surgery team
                select patient_id
                from patient_item
                where clinical_item_id in (%s)
                -- -12434586418575,-12432455207729,-12428492282572,-12428492282572,-12424048595257,-12414081679705
            ) and
            
            adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id and
            
            -- Discharge event
            adt2.event_out = 'Discharge'
            
        order by adt1.shifted_transf_in_dt_tm
        """ % (suspectSepsisItemIdsStr, excludeTeamItemIdsStr);
        print >> sys.stderr, cohortQuery;
        cursor.execute(cohortQuery);

        patientEpisodes = list();
        patientEpisodeById = dict();

        # Collect Build basic patient ID and 
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots();
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row;
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId, 
                        "edAdmitTime":edAdmitTime, 
                        "dischargeTime":dischargeTime, 
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                );
            patientEpisodes.append(patientEpisode);
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[patientEpisode["encounter_id"]] = patientEpisode;

            prog.update();
            row = cursor.fetchone();
        prog.printStatus();

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id");
        query = SQLQuery();
        query.addSelect("pat_id");
        query.addSelect("pat_enc_csn_id");
        query.addSelect("title");
        query.addSelect("bp_systolic");
        query.addSelect("bp_diastolic");
        query.addSelect("temperature");
        query.addSelect("pulse");
        query.addSelect("respirations");
        query.addFrom("stride_patient_encounter");
        query.addWhereIn("pat_enc_csn_id", encounterIds);
        cursor.execute(str(query), query.params);
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row;
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId];
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set();   # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle);
                patientEpisode["bpSystolic"] = bpSystolic;
                patientEpisode["bpDiastolic"] = bpDiastolic;
                patientEpisode["temperature"] = temperature;
                patientEpisode["pulse"] = pulse;
                patientEpisode["respirations"] = respirations;
            row = cursor.fetchone();
        
        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile);
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True);

        return patientEpisodes;
    finally:
        cursor.close();
        conn.close();
Пример #7
0
    def generateFigure(self, resultDicts, summaryData, options=None):
        """Quickly generate an example visualization figure with pylab (matplotlib)
        """
        figureTitle = os.path.splitext(os.path.basename(options.figure))[0]
        if options.title is not None:
            figureTitle = options.title

        rcParams = {
            "annotation.size": 14
        }
        # Custom parameter not part of matplotlib rc defaults as far as I can tell
        if options.rcParams is not None:
            rcParams.update(json.loads(options.rcParams))
            pylab.rcParams.update(rcParams)

        pylab.clf()
        maxItems = len(resultDicts)
        itemsConsideredAxis = columnFromModelList(resultDicts,
                                                  "ItemsConsidered")

        labelIndexes = set()
        if options.labelIndexes is not None:
            labelIndexes.update([
                int(indexStr)
                for indexStr in options.labelIndexes.split(VALUE_DELIM)
            ])

        # Data labels
        #   http://stackoverflow.com/questions/22272081/label-python-data-points-on-plot
        axes = pylab.figure().add_subplot(111)

        # Option for differing line styles
        lastMetricScoreCol = (None, None)
        linestyles = ["-", "--", "-.", ":"]
        iLineStyle = 0
        linestyle = linestyles[iLineStyle]

        # Flag whether data appears to be increasing or decreasing to help guide subsequent legend placement
        isDataIncreasing = None

        # Look into the first item's columns to determine the metrics and axes to plot
        resultKeys = options.axes.split(VALUE_DELIM)
        for iResultKey, resultKey in enumerate(resultKeys):
            if resultKey != "ItemsConsidered":
                (metric, scoreCol) = resultKey.split(AXIS_DELIM)
                if options.cycleLineStyle and metric != lastMetricScoreCol[0]:
                    # New metric, reset the color cycle
                    pylab.gca().set_color_cycle(None)
                    linestyle = linestyles[iLineStyle]
                    iLineStyle = (iLineStyle + 1) % len(linestyles)

                metricAxis = columnFromModelList(resultDicts, resultKey)
                label = '%s' % (resultKey)
                line = pylab.plot(itemsConsideredAxis,
                                  metricAxis,
                                  label=label,
                                  linestyle=linestyle,
                                  linewidth=2)[0]

                # Offset labels to avoid overlap of near values
                offset = (iResultKey % 4) * 2

                # Add item lables as specified
                for i in labelIndexes:
                    pylab.scatter(itemsConsideredAxis[i],
                                  metricAxis[i],
                                  color=line.get_color())
                    # Data points
                    axes.annotate('{:.0%}'.format(metricAxis[i]),
                                  xy=(itemsConsideredAxis[i] + offset,
                                      metricAxis[i]),
                                  xytext=(5, 0),
                                  textcoords="offset points",
                                  color=line.get_color(),
                                  size=rcParams['annotation.size'])

                # Guide legend placement based on where data lines expected
                isDataIncreasing = (metricAxis[-1] > metricAxis[0])

                lastMetricScoreCol = (metric, scoreCol)

        pylab.xlim([1, maxItems])
        pylab.ylim([0.0, 1.05])
        pylab.grid(True)
        pylab.xlabel('Top K Items Considered')
        pylab.ylabel('Accuracy')
        pylab.title(figureTitle)

        legendLoc = "upper right"
        if isDataIncreasing:
            legendLoc = "upper left"
        pylab.legend(loc=legendLoc, title="Metric:Method")

        if options.figure is None:
            # No file specified, just try to do a direct display
            pylab.show(block=True)
        else:
            pylab.savefig(options.figure)
Пример #8
0
def queryPatients(outputFile):
    log.info("Select patients with any ICU life support orders and follow contiguous date trail for apparent hospitalization (long query >20 min)...");
    
    conn = DBUtil.connection();
    cursor = conn.cursor();
    try:
        anyLifeSupportItemId = DBUtil.execute("select clinical_item_id from clinical_item where name = 'AnyICULifeSupport'", conn=conn)[0][0];

        patientById = dict();
        query = \
            """select pi.patient_id, date_trunc('day',pi.item_date), min(pi.encounter_id), count(pi.patient_item_id)
            from patient_item as pi,
            (
                select pi2.patient_id, min(pi2.item_date) as firstLifeSupportDate
                    from patient_item as pi2
                    where pi2.clinical_item_id = %s
                    group by pi2.patient_id
            ) as piX
            where pi.patient_id = piX.patient_id
            and pi.item_date >= piX.firstLifeSupportDate
            group by pi.patient_id, date_trunc('day',pi.item_date)
            order by pi.patient_id, date_trunc('day',pi.item_date)
            """ % anyLifeSupportItemId;
        cursor.execute(query);

        row = cursor.fetchone();
        while row is not None:
            (patientId, itemDate, encounterId, itemCount) = row;
            patientId = int(patientId);
            if patientId not in patientById:
                patientById[patientId] = \
                    RowItemModel \
                    (   {   "patient_id":patientId, 
                            "firstLifeSupportDate":itemDate, 
                            "lastContiguousDate":itemDate, 
                            "encounter_id":encounterId, # Assumes single value that won't be overwritten
                            "payorTitle": None, # Default encounter data to null in case can't find it later
                            "bpSystolic": None,
                            "bpDiastolic": None,
                            "temperature": None,
                            "pulse": None,
                            "respirations": None,
                        }
                    );
            if (itemDate - patientById[patientId]["lastContiguousDate"]) <= CONTIGUOUS_THRESHOLD:
                patientById[patientId]["lastContiguousDate"] = itemDate;
            if patientById[patientId]["encounter_id"] is None:
                patientById[patientId]["encounter_id"] = encounterId;
            row = cursor.fetchone();

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(iter(patientById.values()), "encounter_id");
        query = SQLQuery();
        query.addSelect("pat_id");
        query.addSelect("pat_enc_csn_id");
        query.addSelect("title");
        query.addSelect("bp_systolic");
        query.addSelect("bp_diastolic");
        query.addSelect("temperature");
        query.addSelect("pulse");
        query.addSelect("respirations");
        query.addFrom("stride_patient_encounter");
        query.addWhereIn("pat_enc_csn_id", encounterIds);
        cursor.execute(str(query), query.params);
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row;
            if patientById[patientId]["payorTitle"] is None:
                patientById[patientId]["payorTitle"] = set();   # Single encounters may have multiple payors to track
            patientById[patientId]["payorTitle"].add(payorTitle);
            patientById[patientId]["bpSystolic"] = bpSystolic;
            patientById[patientId]["bpDiastolic"] = bpDiastolic;
            patientById[patientId]["temperature"] = temperature;
            patientById[patientId]["pulse"] = pulse;
            patientById[patientId]["respirations"] = respirations;
            row = cursor.fetchone();
        
        if patientById[patientId]["payorTitle"] is not None:    # Condense to single string
            payorList = list(patientById[patientId]["payorTitle"]);
            payorList.sort();
            patientById[patientId]["payorTitle"] = str.join(",", payorList);
        
        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile);
        formatter.formatResultDicts(iter(patientById.values()), addHeaderRow=True);

        return patientById;    
    finally:
        cursor.close();
        conn.close();