示例#1
0
def parseLabResultsFile(labFile):
    log.info("Parse lab results file");
    prog = ProgressDots();
    labsByBaseNameByPatientId = dict(); # Dictionary of dictionaries of lists of result items
    for labResult in TabDictReader(labFile):
        if labResult["ord_num_value"] is not None and labResult["ord_num_value"] != NULL_STRING:
            patientId = int(labResult["pat_id"]);
            labBaseName = labResult["base_name"];
            resultValue = float(labResult["ord_num_value"]);
            resultTime = DBUtil.parseDateValue(labResult["result_time"]);

            if resultValue < LAB_SENTINEL_VALUE:    # Skip apparent placeholder values
                labResult["pat_id"] = labResult["patient_id"] = patientId;
                labResult["ord_num_value"] = resultValue;
                labResult["result_time"] = resultTime;

                if patientId not in labsByBaseNameByPatientId:
                    labsByBaseNameByPatientId[patientId] = dict();
                if labBaseName not in labsByBaseNameByPatientId[patientId]:
                    labsByBaseNameByPatientId[patientId][labBaseName] = list();
                labsByBaseNameByPatientId[patientId][labBaseName].append( labResult );

        prog.update();
    prog.printStatus();
    return labsByBaseNameByPatientId;
示例#2
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect("count(order_med_id_coded) as nOrders")
        query.addSelect("om.med_route, om.medication_id, om.med_description")
        query.addFrom("starr_datalake2018.order_med as om")
        if options.descriptionPrefix:
            query.addWhereOp("om.med_description", "like",
                             options.descriptionPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.medRoutes:
            query.addWhereIn("om.med_route", options.medRoutes.split(","))
        query.addGroupBy("om.medication_id, om.med_description, om.med_route")
        query.addOrderBy("nOrders desc, om.med_description")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#3
0
    def queryItems(self, options, outputFile):
        """Query for all clinical item records that fulfill the options criteria
        and then send the results as tab-delimited output to the outputFile.
        """
        pauseSeconds = float(options.pauseSeconds)

        query = SQLQuery()
        query.addSelect(
            "cic.description, ci.clinical_item_id, ci.name, ci.description")
        query.addFrom("clinical_item_category as cic")
        query.addFrom("clinical_item as ci")
        query.addWhere(
            "cic.clinical_item_category_id = ci.clinical_item_category_id")
        if options.itemPrefix:
            query.addWhereOp("ci.description", "like",
                             options.itemPrefix + "%%")
            # Add wildcard to enabe prefix search
        if options.categoryNames:
            query.addWhereIn("cic.description",
                             options.categoryNames.split(","))
        query.addOrderBy(
            "cic.description, ci.name, ci.description, ci.clinical_item_id")

        formatter = TextResultsFormatter(outputFile)

        prog = ProgressDots()
        for row in DBUtil.execute(query,
                                  includeColumnNames=True,
                                  connFactory=self.connFactory):
            formatter.formatTuple(row)
            time.sleep(pauseSeconds)
            prog.update()
        prog.printStatus()
示例#4
0
def main(nResamples = 1000, sampleSize = 200, positiveCount = 180):
	

	
	
	confidenceInterval = 0.95

	sample = list()
	for i in range(sampleSize):
		value = 0;
		if i < positiveCount:
			value = 1;
		sample.append(value);
	sample = np.array(sample);

	print("Sample Rate: ",calcStat(sample) );

	rng = np.random.RandomState(nResamples);	# Seed with number for consistency
	bootstrapValues = list();

	prog = ProgressDots();
	for i in range(nResamples):
		indices = rng.random_integers(0, sampleSize-1, sampleSize);	# Get indices of resample items (with replacement)
		resample = sample[indices];
		bootstrapValues.append( calcStat(resample) );
		prog.update();

	ciFractionLow = (1.0-confidenceInterval) / 2;
	ciFractionHigh= confidenceInterval + ciFractionLow;

	bootstrapValues.sort();
	print("Sample Rate 95%CI Low", bootstrapValues[int(ciFractionLow*nResamples)] )
	print("Sample Rate 95%CI High", bootstrapValues[int(ciFractionHigh*nResamples)] )
示例#5
0
def addLabFeatures(labsByBaseNameByPatientId, patientById, colNames, indexItemBaseName, labBaseNames, labPreTime, labPostTime):
    log.info("Sort lab results by result time for each patient and find items within specified time period to aggregate");
    prog = ProgressDots();
    for iPatient, (patientId, labsByBaseName) in enumerate(labsByBaseNameByPatientId.iteritems()):
        # Look for the first result of the index item (ferritin)
        indexItem = None;
        if indexItemBaseName in labsByBaseName:
            for labResult in labsByBaseName[indexItemBaseName]:
                if indexItem is None or labResult["result_time"] < indexItem["result_time"]:
                    indexItem = labResult;

        if indexItem is not None:   # Skip this patient if no index item found, should not be possible since pre-screened for relevant patients
            indexTime = indexItem["result_time"];

            patient = patientById[patientId];
            patient["index_time"] = indexTime;

            preTimeLimit = indexTime+labPreTime;
            postTimeLimit = indexTime+labPostTime;

            # Init values for each lab of interest to an empty list
            for labBaseName in labBaseNames:
                # Default to null for all values
                patient["%s.min" % labBaseName] = None;
                patient["%s.max" % labBaseName] = None;
                patient["%s.median" % labBaseName] = None;
                patient["%s.mean" % labBaseName] = None;
                patient["%s.std" % labBaseName] = None;
                patient["%s.first" % labBaseName] = None;
                patient["%s.last" % labBaseName] = None;
                patient["%s.proximate" % labBaseName] = None;
                
                proximateValue = None;
                if labBaseName in labsByBaseName:   # Not all patients will have all labs checked
                    proximateItem = None;   # Item closest to the index item in time
                    valueList = list();
                    for labResult in labsByBaseName[labBaseName]:
                        resultTime = labResult["result_time"];
                        if preTimeLimit <= resultTime and resultTime < postTimeLimit:
                            # Occurs within time frame of interest, so record this value
                            valueList.append(labResult["ord_num_value"]);
                            
                        if proximateItem is None or (abs(resultTime-indexTime) < abs(proximateItem["result_time"]-indexTime)):
                            proximateItem = labResult;
                    proximateValue = proximateItem["ord_num_value"];
                
                    if len(valueList) > 0:
                        patient["%s.min" % labBaseName] = np.min(valueList);
                        patient["%s.max" % labBaseName] = np.max(valueList);
                        patient["%s.median" % labBaseName] = np.median(valueList);
                        patient["%s.mean" % labBaseName] = np.mean(valueList);
                        patient["%s.std" % labBaseName] = np.std(valueList);
                        patient["%s.first" % labBaseName] = valueList[0];   # Assumes previously sorted 
                        patient["%s.last" % labBaseName] = valueList[-1];   #   by result_time
                    patient["%s.proximate" % labBaseName] = proximateValue;

                prog.update();
    colNames.extend(colsFromLabBaseNames(labBaseNames));
    prog.printStatus();
示例#6
0
文件: TopicModel.py 项目: xxxx3/CDSS
    def buildModel(self, corpusBOWGenerator, numTopics):
        """Build topic model from corpus (interpret as generator over contents)

        Given the bag-of-words corpus, build a docCountByWordId count dictionary
        to facilitate subsequent Term Frequency * Inverse DOCUMENT FREQUENCY calculations.
        In Clinical context, document = patient.

        Return (model, docCountByWordId);
        """
        # Load dictionary to translate item IDs to descriptions
        itemsById = DBUtil.loadTableAsDict("clinical_item")
        id2word = dict()
        # Models expect a pair for every possible item ID, and judges vocabulary size by length of this dictionary rather than the maximum ID values.  That means have to populate all of the empty ones as well.
        maxId = max(itemsById.keys())
        for itemId in range(maxId + 1):
            description = str(itemId)
            # Default to just the same as the ID string
            if itemId in itemsById:
                description = itemsById[itemId]["description"]
            id2word[itemId] = description

        # Stream in progressive updates from corpus generator so don't have to load all into memory
        # Do a batch of many at a time, otherwise very slow to increment one at a time
        docBuffer = list()

        prog = ProgressDots()
        self.model = None
        self.docCountByWordId = {
            None: 0
        }
        # Use None key to represent count of all documents
        for i, document in enumerate(corpusBOWGenerator):
            for (
                    wordId, wordCount
            ) in document:  # Assuming uniqueness of wordId keys for each document
                if wordId not in self.docCountByWordId:
                    self.docCountByWordId[wordId] = 0
                self.docCountByWordId[wordId] += 1
            self.docCountByWordId[None] += 1

            docBuffer.append(document)
            if i % BUFFER_UPDATE_SIZE == (
                    BUFFER_UPDATE_SIZE -
                    1):  # Update model with current buffer of documents
                self.model = self.updateModel(self.model, docBuffer, id2word,
                                              numTopics)
                docBuffer = list()
                # Discard committed buffer
            prog.update()

        self.model = self.updateModel(self.model, docBuffer, id2word,
                                      numTopics)
        # Last update for any remaining documents in buffer
        docBuffer = list()
        # Discard committed buffer

        # prog.printStatus();
        return (self.model, self.docCountByWordId)
示例#7
0
    def main(self, argv):
        """Main method, callable from command line"""
        usageStr = "usage: %prog -p <pid> -c <commandStr>\n"
        parser = OptionParser(usage=usageStr)
        parser.add_option(
            "-p",
            "--pid",
            dest="pid",
            help=
            "Process ID to monitor.  As soon as it is no longer found by Unix signal (os.kill(pid,0)), will proceed to execute the given command"
        )
        parser.add_option(
            "-i",
            "--interval",
            dest="interval",
            default="1",
            help=
            "How many seconds to wait before checking if the PID is still active.  Default to 1 second."
        )
        parser.add_option(
            "-c",
            "--commandStr",
            dest="commandStr",
            help=
            "Command string to execute after the specified PID is longer found.  Will just pass whole string to subprocess"
        )
        (options, args) = parser.parse_args(argv[1:])

        log.info("Starting: " + str.join(" ", argv))
        timer = time.time()

        ###########---From medinfo/common/support/awaitProcess.py)---#############
        if options.pid and options.commandStr:
            pid = int(options.pid)
            interval = float(options.interval)

            prog = ProgressDots(60, 1, "intervals")
            while pidExists(pid):
                time.sleep(interval)
                prog.update()
            # prog.printStatus();

            print >> sys.stderr, "Executing: ", options.commandStr
            process = subprocess.Popen(options.commandStr)
            print >> sys.stderr, "Started process: ", process.pid
        else:
            parser.print_help()
            sys.exit(-1)

        timer = time.time() - timer
        print >> sys.stderr, ("%.3f seconds to complete" % timer)
        ###---END awaitProcess method---###

        timer = time.time() - timer
        log.info("%.3f seconds to complete", timer)
示例#8
0
def parseClinicalItemFile(itemFile, patientIdCol="patient_id", timeCol="item_date"):
    prog = ProgressDots();
    itemTimesByPatientId = dict();
    for itemData in TabDictReader(itemFile):
        patientId = int(itemData[patientIdCol]);
        itemTime = DBUtil.parseDateValue(itemData[timeCol]);

        itemData[patientIdCol] = patientId;
        itemData[timeCol] = itemTime;

        if patientId not in itemTimesByPatientId:
            itemTimesByPatientId[patientId] = list();
        itemTimesByPatientId[patientId].append( itemTime );

        prog.update();
    prog.printStatus();

    return itemTimesByPatientId;
示例#9
0
def main(argv):
    conversionProcessor = STRIDEOrderResultsConversion();
    
    conn = DBUtil.connection();
    try:
        # Pull out list of result names to look for that are not already in the calculated
        nameTable = DBUtil.execute("select name from sim_result    except    select base_name from order_result_stat", conn=conn);
        prog = ProgressDots(big=1,small=1,total=len(nameTable));
        for row in nameTable:
            baseName = row[0];
            print("Calculating Stats for %s" % baseName, file=sys.stderr);
            statModel = conversionProcessor.calculateResultStats( baseName, conn=conn );
            DBUtil.insertRow("order_result_stat", statModel, conn=conn );
            prog.update();
        prog.printStatus();
        conn.commit();
    finally:
        conn.close();
示例#10
0
    def __call__(self, inputFiles):
        """Return generator over dictionary objects representing
        the concatenated contents of the input files after adding and accounting for argv parameter columns.
        """
        # Consolidate a master set of all column headers to use
        self.colNames = list()
        # Keep track of consistent order found
        colSet = set()
        # Keep track of unique items

        # Pull out any header comments that may represent an argv list to parse
        # Pull out header row with column labels for each input file
        argvDicts = list()
        # Dictionary for each input file keyed by argv parameter name with respective value
        readers = list()
        # TabDictReader from which header columns can be accessed as fieldnames
        for inputFile in inputFiles:
            reader = TabDictReader(inputFile)
            readers.append(reader)
            for col in reader.fieldnames:
                if col not in colSet:
                    colSet.add(col)
                    self.colNames.append(col)

            argvDict = self.extract_argvDict(reader.commentLines)
            # Must be called after reader.fieldnames so initial text parsing will start
            argvDicts.append(argvDict)
            for col in argvDict.iterkeys():
                if col not in colSet:
                    colSet.add(col)
                    self.colNames.append(col)

        prog = ProgressDots(50, 1, "Files")

        # Now generate each file in succession, but "outer-joined" to include the master column header list
        for argvDict, reader in zip(argvDicts, readers):
            for resultDict in reader:
                resultDict.update(argvDict)
                for col in self.colNames:
                    if col not in resultDict:
                        resultDict[col] = None
                yield resultDict
            prog.update()
示例#11
0
def main_quickTest(argv):
    modelFilename = argv[1]
    modeler = TopicModel()

    timer = time.time()
    (model, docCountByWordId) = modeler.loadModelAndDocCounts(modelFilename)
    timer = time.time() - timer
    log.info("%.2f seconds to load", timer)

    timer = time.time()
    weightByItemIdByTopicId = modeler.generateWeightByItemIdByTopicId(
        model, 100)
    timer = time.time() - timer
    log.info("%.2f seconds to generate weights", timer)

    for i in xrange(3):
        prog = ProgressDots()
        for (topicId, weightByItemId) in weightByItemIdByTopicId.iteritems():
            for (itemId, itemWeight) in weightByItemId.iteritems():
                prog.update()
        prog.printStatus()
    """
示例#12
0
def main(argv):
    medIdsByActiveRx = dict()
    medIdsByActiveRx['Buprenorphine'] = ('125498', '114474', '212560',
                                         '114475', '114467', '114468')
    medIdsByActiveRx['Fentanyl Patch'] = ('2680', '27908', '125379', '27905',
                                          '27906', '540107', '540638',
                                          '540101', '27907')
    medIdsByActiveRx['Methadone'] = ('540483', '4953', '4951', '10546',
                                     '214468', '15996', '41938', '4954',
                                     '4952')
    medIdsByActiveRx['Hydrocodone'] = ('3724', '4579', '8576', '8577', '8951',
                                       '10204', '12543', '13040', '14963',
                                       '14965', '14966', '17061', '17927',
                                       '19895', '20031', '28384', '29486',
                                       '29487', '34505', '34544', '35613',
                                       '117862', '204249', '206739')
    medIdsByActiveRx['Hydromorphone'] = ('2458', '2459', '2464', '2465',
                                         '3757', '3758', '3759', '3760',
                                         '3761', '10224', '10225', '10226',
                                         '10227', '200439', '201094', '201096',
                                         '201098', '540125', '540179',
                                         '540666')
    medIdsByActiveRx['Morphine'] = (
        '5167', '5168', '5172', '5173', '5176', '5177', '5178', '5179', '5180',
        '5183', '6977', '10655', '15852', '20908', '20909', '20910', '20914',
        '20915', '20919', '20920', '20921', '20922', '29464', '30138', '31413',
        '36140', '36141', '79691', '87820', '89282', '91497', '95244', '96810',
        '112562', '112564', '115335', '115336', '126132', '198543', '198544',
        '198623', '201842', '201848', '205011', '206731', '207949', '208896',
        '540182', '540300')
    medIdsByActiveRx['Oxycodone'] = ('5940', '5941', '6122', '6981', '10812',
                                     '10813', '10814', '14919', '16121',
                                     '16123', '16129', '16130', '19187',
                                     '26637', '26638', '27920', '27921',
                                     '27922', '27923', '28897', '28899',
                                     '28900', '31851', '31852', '31863',
                                     '31864', '92248', '126939', '200451',
                                     '203690', '203691', '203692', '203705',
                                     '203706', '203707', '204020', '204021')

    query = baseQuery()
    totalPatients = float(DBUtil.execute(query)[0][0])

    # print"Total Patients\t%s" % totalPatients

    # print"======= Dx Groups ===========";
    # print"Dx Group\tPt Count\tDx Rate";
    patientsPerDxGroup = dict()
    query = SQLQuery()
    query.addSelect("count(distinct prob.pat_id) as ptCount")
    query.addSelect("prob.%s" % DX_COL)
    query.addFrom("stride_problem_list as prob")
    query.addWhereOp("prob.noted_date", "<", LIMIT_DATE)
    query.addGroupBy("prob.%s" % DX_COL)
    if DOUBLE_DX:
        query.addSelect("prob2.%s" % DX_COL)
        query.addFrom("stride_problem_list as prob2")
        query.addWhere("prob.pat_id = prob2.pat_id")
        query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE)
        query.addGroupBy("prob2.%s" % DX_COL)
    results = DBUtil.execute(query)
    for row in results:
        patientCount = row[0]
        dxGroup = row[1]
        if DOUBLE_DX:
            dxGroup = (dxGroup, row[2])
            # Composite tuple including second diagnosis
        patientsPerDxGroup[dxGroup] = patientCount

    progress = ProgressDots()
    for activeRx, medIds in medIdsByActiveRx.iteritems():
        query = baseQuery()
        query.addWhereIn("medication_id", medIds)

        # Baseline prescription rates
        rxPtCount = DBUtil.execute(query)[0][0]

        # print"====== Rx Counts ======";
        # print"Rx\tPt Count\tRx Rate";
        # print"%s\t%s\t%s" % (activeRx, rxPtCount, (rxPtCount/totalPatients));

        # print"======== Rx-Dx Association ========";
        statIds = (
            "P-Fisher",
            "P-YatesChi2",
            "oddsRatio",
            "relativeRisk",
            "interest",
            "LR+",
            "LR-",
            "sensitivity",
            "specificity",
            "PPV",
            "NPV",
        )
        if progress.getCounts() == 0:
            headerCols = [
                "Rx", "Dx", "RxDxCount", "RxCount", "DxCount", "Total"
            ]
            if DOUBLE_DX:
                headerCols.insert(2, "Dx2")
            headerCols.extend(statIds)
            headerStr = str.join("\t", headerCols)
            print headerStr

        # Query out per diagnosis group, but do as aggregate grouped query
        query.addSelect("prob.%s" % DX_COL)
        query.addFrom("stride_problem_list as prob")
        query.addWhere("med.pat_id = prob.pat_id")
        query.addWhereOp("prob.noted_date", "<", LIMIT_DATE)
        #query.addWhereIn("prob.%s" % DX_COL, dxKeys );
        query.addGroupBy("prob.%s" % DX_COL)
        if DOUBLE_DX:
            query.addSelect("prob2.%s" % DX_COL)
            query.addFrom("stride_problem_list as prob2")
            query.addWhere("prob.pat_id = prob2.pat_id")
            query.addWhereOp("prob2.noted_date", "<", LIMIT_DATE)
            query.addGroupBy("prob2.%s" % DX_COL)
        results = DBUtil.execute(query)
        for row in results:
            rxDxPtCount = row[0]
            dxGroup = row[1]
            if DOUBLE_DX:
                dxGroup = (dxGroup, row[2])
                # Composite tuple including second diagnosis
            dxPtCount = patientsPerDxGroup[dxGroup]

            conStats = ContingencyStats(rxDxPtCount, rxPtCount, dxPtCount,
                                        totalPatients)

            dataCells = [
                activeRx, dxGroup, rxDxPtCount, rxPtCount, dxPtCount,
                totalPatients
            ]
            if DOUBLE_DX:
                dataCells[1] = dxGroup[0]
                dataCells.insert(2, dxGroup[1])
            for statId in statIds:
                try:
                    dataCells.append(conStats[statId])
                except ZeroDivisionError:
                    dataCells.append(None)
            for i, value in enumerate(dataCells):
                dataCells[i] = str(value)
                # String conversion to allow for concatenation below
            dataStr = str.join("\t", dataCells)
            print dataStr
            progress.update()
    progress.printStatus()
示例#13
0
def queryPatientEpisodes(outputFile, extractor):
    log.info("Select patient admissions with possible/probable sepsis within 24 hours of admission (long query >60 min?)...");

    conn = DBUtil.connection();
    cursor = conn.cursor();
    try:
        # Clinical item category for admission diagnoses
        # ADMIT_DX_CATEGORY_ID = 2;
        admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # Look for items indicating suspected infection / sepsis
        ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # Look for primary surgery teams to exclude
        excludeTeamCategory = "SurgerySpecialty";
        excludeTreatmentTeams = list();
        for row in extractor.loadMapData("TreatmentTeamGroups"):
            if row["team_category"] == excludeTeamCategory:
                excludeTreatmentTeams.append(row["treatment_team"]);
        query = SQLQuery();
        query.addSelect("clinical_item_id");
        query.addFrom("clinical_item");
        query.addWhereIn("description", excludeTreatmentTeams );
        excludeTeamItemIds = set();
        for row in DBUtil.execute(query, conn=conn):
            excludeTeamItemIds.add(row[0]);
        excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        --  Pick out date(s) when admitted through emergency department and matching discharge time
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            -- Admission event
            adt1.department_in = 'EMERGENCY DEPARTMENT' and
            adt1.event_in = 'Admission' and
            adt1.pat_anon_id in
            (    -- Select any patient with any suspected sepsis related order (i.e., IV antibiotics or blood cultures)
                select patient_id
                from patient_item as pi
                where pi.clinical_item_id in (%s)
                except
                -- Exclude any patient who has been on a primary surgery team
                select patient_id
                from patient_item
                where clinical_item_id in (%s)
                -- -12434586418575,-12432455207729,-12428492282572,-12428492282572,-12424048595257,-12414081679705
            ) and
            
            adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id and
            
            -- Discharge event
            adt2.event_out = 'Discharge'
            
        order by adt1.shifted_transf_in_dt_tm
        """ % (suspectSepsisItemIdsStr, excludeTeamItemIdsStr);
        print >> sys.stderr, cohortQuery;
        cursor.execute(cohortQuery);

        patientEpisodes = list();
        patientEpisodeById = dict();

        # Collect Build basic patient ID and 
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots();
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row;
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId, 
                        "edAdmitTime":edAdmitTime, 
                        "dischargeTime":dischargeTime, 
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                );
            patientEpisodes.append(patientEpisode);
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[patientEpisode["encounter_id"]] = patientEpisode;

            prog.update();
            row = cursor.fetchone();
        prog.printStatus();

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id");
        query = SQLQuery();
        query.addSelect("pat_id");
        query.addSelect("pat_enc_csn_id");
        query.addSelect("title");
        query.addSelect("bp_systolic");
        query.addSelect("bp_diastolic");
        query.addSelect("temperature");
        query.addSelect("pulse");
        query.addSelect("respirations");
        query.addFrom("stride_patient_encounter");
        query.addWhereIn("pat_enc_csn_id", encounterIds);
        cursor.execute(str(query), query.params);
        row = cursor.fetchone();
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic, temperature, pulse, respirations) = row;
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId];
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set();   # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle);
                patientEpisode["bpSystolic"] = bpSystolic;
                patientEpisode["bpDiastolic"] = bpDiastolic;
                patientEpisode["temperature"] = temperature;
                patientEpisode["pulse"] = pulse;
                patientEpisode["respirations"] = respirations;
            row = cursor.fetchone();
        
        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile);
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True);

        return patientEpisodes;
    finally:
        cursor.close();
        conn.close();
示例#14
0
    def __call__(self,
                 inputFile,
                 labelCols,
                 valueCols,
                 matchCols,
                 baseLabels=None):
        prog = ProgressDots()

        self.labelCols = labelCols
        self.valueCols = valueCols
        self.matchCols = matchCols
        self.baseLabels = baseLabels

        labelModelByLabelKey = dict()
        dataByLabelKey = dict()

        reader = TabDictReader(inputFile)
        for rowModel in reader:
            labelKey = list()
            labelModel = dict()
            for labelCol in self.labelCols:
                labelModel[labelCol] = rowModel[labelCol]
                labelKey.append(rowModel[labelCol])
            labelKey = tuple(labelKey)
            # Change to immutable object that can be hashed

            # Copy just items of interest
            valueModel = {}
            if self.matchCols:
                for matchCol in self.matchCols:
                    valueModel[matchCol] = rowModel[matchCol]
            for valueCol in self.valueCols:
                try:
                    valueModel[valueCol] = float(rowModel[valueCol])
                except ValueError:  # Maybe None string, could not parse into a number
                    valueModel[valueCol] = None

            if labelKey not in dataByLabelKey:
                labelModelByLabelKey[labelKey] = labelModel
                dataByLabelKey[labelKey] = list()
            dataByLabelKey[labelKey].append(valueModel)

            prog.update()

        # prog.printStatus();

        # Another pass to ensure data is consistently sorted in each group to allow later paired t-tests
        if self.matchCols:
            for labelKey, data in dataByLabelKey.iteritems():
                data.sort(RowItemFieldComparator(self.matchCols))

        # See if looking for only one set of base labeled data to compare the rest against
        baseLabelKey = None
        if self.baseLabels is not None:
            baseLabelKey = tuple(self.baseLabels)

        # Result pass to compare all group pair-wise combinations
        prog = ProgressDots()
        for labelKey0, data0 in dataByLabelKey.iteritems():
            prefix0 = "Group0."
            labelModel0 = labelModelByLabelKey[labelKey0]

            if baseLabelKey is not None and labelKey0 != baseLabelKey:
                continue
                # Skip entries where the base label does not match specified key

            for labelKey1, data1 in dataByLabelKey.iteritems():
                prefix1 = "Group1."
                labelModel1 = labelModelByLabelKey[labelKey1]

                result = dict()
                for labelCol in self.labelCols:
                    result[prefix0 + labelCol] = labelModel0[labelCol]
                    result[prefix1 + labelCol] = labelModel1[labelCol]

                for valueCol in self.valueCols:
                    # Pull out value column for each data group.  Previous, sort by match col to allow paired t-testing
                    # Skip any value pairs if non-numeric / None value
                    values0 = list()
                    values1 = list()
                    for dataItem0, dataItem1 in zip(data0, data1):
                        if dataItem0[valueCol] is not None and dataItem1[
                                valueCol] is not None:
                            values0.append(dataItem0[valueCol])
                            values1.append(dataItem1[valueCol])

                    for summaryFunction in SUMMARY_FUNCTIONS:
                        result[prefix0 + valueCol + "." +
                               summaryFunction.__name__] = summaryFunction(
                                   values0)
                        result[prefix1 + valueCol + "." +
                               summaryFunction.__name__] = summaryFunction(
                                   values1)

                    for compTest in COMPARISON_TESTS:
                        (t, p) = compTest(values0, values1)
                        if np.isnan(p):
                            p = None
                            # Use more generic expression for NaN / null value
                        result[compTest.__name__ + "." + valueCol] = p

                yield result

                prog.update()
示例#15
0
    for iDay in range(1, lengthOfStay):
        if (random.random() < dailyDNRRate):
            dnrDay = iDay
            dnrOccurs = True
            break
            # Don't need to keep looking
    rowData["AnyDNRatEnd"] = dnrOccurs + 0

    # Generate daily data
    for iDay in range(lengthOfStay):
        rowData["curr_day"] = rowData["start"] = iDay
        rowData["end"] = iDay + 1
        rowData["timeUntilNoMoreData"] = lengthOfStay - rowData["start"]
        rowData["timeUntilNoDataOrDNR"] = rowData["timeUntilNoMoreData"]
        if dnrOccurs:
            rowData["AnyDNR.pre"] = (iDay >= dnrDay) + 0
            rowData["AnyDNR.within1day"] = (iDay + 1 == dnrDay) + 0
            rowData["AnyDNR.postTimeDays"] = dnrDay - iDay
            if rowData["AnyDNR.postTimeDays"] < 0:
                rowData["AnyDNR.postTimeDays"] = "NA"
            else:
                rowData["timeUntilNoDataOrDNR"] = min(
                    rowData["timeUntilNoMoreData"],
                    rowData["AnyDNR.postTimeDays"])
        else:
            rowData["AnyDNR.pre"] = 0
            rowData["AnyDNR.within1day"] = 0
            rowData["AnyDNR.postTimeDays"] = "NA"
        formatter.formatResultDict(rowData, colNames)
    prog.update()
ofs.close()
示例#16
0
def queryPatientEpisodes(outputFile, extractor):
    log.info(
        "Select patient admissions with provider category of Tt Pamf Med (Primary) or Tt Med Univ (Primary)"
    )

    conn = DBUtil.connection()
    cursor = conn.cursor()
    try:
        # # Clinical item category for admission diagnoses
        # # ADMIT_DX_CATEGORY_ID = 2;
        # admitDxCategoryId = DBUtil.execute("select clinical_item_category_id from clinical_item_category where description like '%%ADMIT_DX%%'", conn=conn)[0][0];

        # # Look for items indicating suspected infection / sepsis
        # ivAntibioticItemIds = loadIVAntibioticItemIds(extractor);
        # bloodCultureItemIds = loadBloodCultureItemIds(extractor);
        # respiratoryViralPanelItemIds = loadRespiratoryViralPanelItemIds(extractor);

        # # Merge IV antibiotics and blood cultures, respiratory panels as items that suggest sepsis is suspected
        # suspectSepsisItemIds = ivAntibioticItemIds.union(bloodCultureItemIds.union(respiratoryViralPanelItemIds));
        # suspectSepsisItemIdsStr = str.join(',', [str(itemId) for itemId in suspectSepsisItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # # Look for primary surgery teams to exclude
        # excludeTeamCategory = "SurgerySpecialty";
        # excludeTreatmentTeams = list();
        # for row in extractor.loadMapData("TreatmentTeamGroups"):
        #     if row["team_category"] == excludeTeamCategory:
        #         excludeTreatmentTeams.append(row["treatment_team"]);
        # query = SQLQuery();
        # query.addSelect("clinical_item_id");
        # query.addFrom("clinical_item");
        # query.addWhereIn("description", excludeTreatmentTeams );
        # excludeTeamItemIds = set();
        # for row in DBUtil.execute(query, conn=conn):
        #     excludeTeamItemIds.add(row[0]);
        # excludeTeamItemIdsStr = str.join(',', [str(itemId) for itemId in excludeTeamItemIds]);   # Convert to comma-separated string via a str.join function on list contracture

        # First pass query to get the list of patients and emergency department presentation times
        cohortQuery = \
        """
        select adt1.pat_anon_id, adt1.pat_enc_csn_anon_id, adt1.shifted_transf_in_dt_tm as edAdmitTime, adt2.shifted_transf_out_dt_tm as dischargeTime
        from stride_adt as adt1, stride_adt as adt2
        where 
            adt1.pat_anon_id in
            (select patient_id from patient_item inner join clinical_item on patient_item.clinical_item_id = clinical_item.clinical_item_id where clinical_item.clinical_item_category_id = 161 AND clinical_item.description = '%s') 
        and adt1.pat_enc_csn_anon_id = adt2.pat_enc_csn_anon_id
        """ % ("Tt Pamf Med (Primary)")

        print(cohortQuery, file=sys.stderr)
        cursor.execute(cohortQuery)

        patientEpisodes = list()
        patientEpisodeById = dict()

        # Collect Build basic patient ID and
        #   ED presentation dates and Discharge date/time
        prog = ProgressDots()
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, edAdmitTime, dischargeTime) = row
            #patientId = int(patientId);
            patientEpisode = \
                RowItemModel \
                (   {   "patient_id":patientId,
                        "edAdmitTime":edAdmitTime,
                        "dischargeTime":dischargeTime,
                        "encounter_id":encounterId,
                        "payorTitle": None, # Default encounter data to null in case can't find it later
                        "bpSystolic": None,
                        "bpDiastolic": None,
                        "temperature": None,
                        "pulse": None,
                        "respirations": None,
                    }
                )
            patientEpisodes.append(patientEpisode)
            if patientEpisode["encounter_id"] not in patientEpisodeById:
                patientEpisodeById[
                    patientEpisode["encounter_id"]] = patientEpisode

            prog.update()
            row = cursor.fetchone()
        prog.printStatus()

        # Second query phase to link to encounter information (e.g., insurance, admitting vital signs)
        encounterIds = columnFromModelList(patientEpisodes, "encounter_id")
        query = SQLQuery()
        query.addSelect("pat_id")
        query.addSelect("pat_enc_csn_id")
        query.addSelect("title")
        query.addSelect("bp_systolic")
        query.addSelect("bp_diastolic")
        query.addSelect("temperature")
        query.addSelect("pulse")
        query.addSelect("respirations")
        query.addFrom("stride_patient_encounter")
        query.addWhereIn("pat_enc_csn_id", encounterIds)
        cursor.execute(str(query), query.params)
        row = cursor.fetchone()
        while row is not None:
            (patientId, encounterId, payorTitle, bpSystolic, bpDiastolic,
             temperature, pulse, respirations) = row
            if encounterId in patientEpisodeById:
                patientEpisode = patientEpisodeById[encounterId]
                if patientEpisode["payorTitle"] is None:
                    patientEpisode["payorTitle"] = set()
                    # Single encounters may have multiple payors to track
                patientEpisode["payorTitle"].add(payorTitle)
                patientEpisode["bpSystolic"] = bpSystolic
                patientEpisode["bpDiastolic"] = bpDiastolic
                patientEpisode["temperature"] = temperature
                patientEpisode["pulse"] = pulse
                patientEpisode["respirations"] = respirations
            row = cursor.fetchone()

        # Drop results as tab-delimited text output
        formatter = TextResultsFormatter(outputFile)
        formatter.formatResultDicts(patientEpisodes, addHeaderRow=True)

        return patientEpisodes
    finally:
        cursor.close()
        conn.close()
示例#17
0
    def commitUpdateBuffer(self, updateBuffer, linkedItemIdsByBaseId, conn=None):
        """Take data accumulated in updateBuffer from prior update methods and
        commit them as incremental changes to the database.
        Clear buffer thereafter.
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();
        try:
            if "incrementDataByItemIdPair" in updateBuffer:
                # Ensure baseline records exist to facilitate subsequent incremental update queries
                itemIdPairs = list(updateBuffer["incrementDataByItemIdPair"].keys());
                self.prepareItemAssociations(itemIdPairs, linkedItemIdsByBaseId, conn);

                # Construct incremental update queries based on each item pair's incremental counts/sums
                nItemPairs = len(itemIdPairs);
                log.debug("Primary increment updates for %d item pairs" % nItemPairs );
                incrementProg = ProgressDots(name="Increments");
                incrementProg.total = nItemPairs;
                cursor = conn.cursor();
                try:
                    for (itemIdPair, incrementData) in updateBuffer["incrementDataByItemIdPair"].items():
                        query = ["UPDATE clinical_item_association SET"];
                        for col, increment in incrementData.items():
                            query.append("%(col)s=%(col)s+%(increment)s" % {"col":col,"increment":increment});
                            query.append(",");
                        query.pop();    # Drop extra comma at end of list
                        query.append("WHERE clinical_item_id=%(p)s AND subsequent_item_id=%(p)s" % {"p":DBUtil.SQL_PLACEHOLDER} );
                        query = str.join(" ", query);
                        itemIdPair = eval(itemIdPair)
                        cursor.execute(query, itemIdPair);
                        incrementProg.update();
                    # incrementProg.printStatus();
                finally:
                    cursor.close();

            if "analyzedPatientItemIds" in updateBuffer:
                # Record analysis date for the given patient items
                patientItemIdSet = updateBuffer["analyzedPatientItemIds"];
                nItems = len(patientItemIdSet);
                log.debug("Record %d analyzed items" % nItems );
                if nItems > 0:
                    paramList = [datetime.now()];
                    updateSize = 0;
                    for itemId in patientItemIdSet:
                        paramList.append(itemId);
                        updateSize += 1;

                        if self.itemsPerUpdate is not None and updateSize > self.itemsPerUpdate:
                            # Update what we have so far to avoid excessive single mass query that may overwhelm database timeout
                            DBUtil.execute \
                            (   """update patient_item
                                set analyze_date = %(p)s
                                where patient_item_id in (%(pList)s)
                                and analyze_date is null
                                """ % {"p": DBUtil.SQL_PLACEHOLDER, "pList":generatePlaceholders(updateSize)},
                                tuple(paramList),
                                conn=conn
                            );
                            # Reset item list parameters
                            paramList = [datetime.now()];
                            updateSize = 0;
                    # Final Update
                    DBUtil.execute \
                    (   """update patient_item
                        set analyze_date = %(p)s
                        where patient_item_id in (%(pList)s)
                        and analyze_date is null
                        """ % {"p": DBUtil.SQL_PLACEHOLDER, "pList":generatePlaceholders(updateSize)},
                        tuple(paramList),
                        conn=conn
                    );

            # Flag that any cached association metrics will be out of date
            self.dataManager.clearCacheData("analyzedPatientCount");
            self.dataManager.clearCacheData("clinicalItemCountsUpdated");

            # Database commit
            conn.commit();

            # Wipe out buffer to reflect incremental changes done, so any new ones should be recorded fresh
            updateBuffer.clear();
            updateBuffer["nAssociations"] = 0;
        finally:
            if not extConn:
                conn.close();
示例#18
0
# Select the second (index one) form (the first form is a search query box)
#br.select_form(nr=1)

# User credentials, single login and then store a cookie
#br.form['username'] = '******'
#br.form['password'] = '******'

# Login
#br.submit()

# Base URL to query for, with parameters for subsets
BASE_URL = 'http://www.samhsa.gov/medication-assisted-treatment/physician-program-data/treatment-physician-locator?field_bup_physician_us_state_value=All&page=%s'
BASE_OUTPUT_FILENAME = 'buprenorphinePhysicians.%s.htm';
N_PAGES = 641
#N_PAGES = 10

progress = ProgressDots(big=100,small=2);
for iPage in xrange(N_PAGES):
    sourceURL = BASE_URL % (iPage);
    sourceFile = br.open(sourceURL);
    sourceContent = sourceFile.read();  # Just store whole file in memory for simplicity
    sourceFile.close();
    
    localFilename = BASE_OUTPUT_FILENAME % (iPage);
    localFile = open(localFilename,"w");
    localFile.write(sourceContent);
    localFile.close();
    
    progress.update();
progress.printStatus();
示例#19
0
    ];

for categoryName in CATEGORY_LIST:
    for itemPrefix in ITEM_PREFIXES:
        specificArgv = ["-c", categoryName, "-i", itemPrefix, "results/queryResults.%s.%s.tab.gz" % (itemPrefix, categoryName) ];
        specificArgvList.append(specificArgv);

prog = ProgressDots(1,1,"Processes",total=len(specificArgvList));
for specificArgv in specificArgvList:
    key = "%.3d.%s" % (prog.getCounts(), str.join("_",specificArgv) );
    key = key.replace("/","..");    # Don't want to use directory separator in temp log file name
    argv = list(baseArgv)
    argv.extend(specificArgv);
    log.info( "Starting: "+str.join(" ", argv) );
    logFile = stdOpen(LOG_FILE_TEMPLATE % key,"w")
    
    # Blocking sub-process call if want serial processes.
    #subprocess.call(argv, stderr=logFile);

    # Non-blocking subprocess.Popen to spawn parallel processes
    process = subprocess.Popen(argv, stderr=logFile);
    log.info("Process ID: %s" % process.pid);

    # Print command lines to effectively generate a .sh script
    #print "nohup",
    #print str.join(" ", argv),
    #print "&>", LOG_FILE_TEMPLATE % key,"&"
 
    prog.update();
prog.printStatus();
示例#20
0
    def __call__(self, sourceFile, outputFile):
        headers = sourceFile.readline().split()
        for i, header in enumerate(headers):
            headers[i] = header.lower()

        prog = ProgressDots(50, 1)
        summaryRecords = list()

        headerData = {"script": HTML_SCRIPT, "style": HTML_STYLE}

        print(HTML_START % headerData, file=outputFile)
        nextRecord = None
        lastLine = None

        print('''<table class="dataTable" cellspacing=0 cellpadding=4>''',
              file=outputFile)
        iRecord = 0
        for line in sourceFile:
            if self.isNewRecordLine(line):
                # Blank line should signal end of a record
                if nextRecord is not None:
                    # Process the prior record
                    if iRecord % self.sampleInterval == 0:
                        self.processRecord(nextRecord, iRecord)
                        self.outputRecordDetail(nextRecord, outputFile)
                        summaryRecords.append(
                            self.extractSummaryRecord(nextRecord))
                        # Add just summary information to display later, without keeping whole text file in memory. This can still 2GB RAM per 1000 records, so may be better to route to temporary file instead
                    iRecord += 1
                    prog.update()

                # Prepare a new record
                nextRecord = dict()
                chunks = line.split(self.delim)
                numChunks = len(chunks)
                for i, header in enumerate(headers):
                    if i < numChunks:
                        nextRecord[header] = chunks[i]
                    else:  # Not enough values.  Probably because continues to next line.  Record blank string
                        nextRecord[header] = ""
                #nextRecord["contact_date"] = DBUtil.parseDateValue(nextRecord["contact_date"]);
            else:  # documentHeader in nextRecord   # Continuing lines of text
                nextRecord[self.documentHeader] += line

        # Process the last record
        if iRecord % self.sampleInterval == 0:
            self.processRecord(nextRecord, iRecord)
            self.outputRecordDetail(nextRecord, outputFile)
            summaryRecords.append(self.extractSummaryRecord(nextRecord))
            # Add just summary information to display later, without keeping whole text file in memory
        iRecord += 1
        prog.update()
        # prog.printStatus();

        print('''</table>''', file=outputFile)

        self.outputSummaryRecords(summaryRecords, outputFile)

        print(HTML_END, file=outputFile)

        #for header in headerSet:
        #    print >> sys.stdout, header;
        return summaryRecords
示例#21
0
"""Helped script for parallel process debugging. Just takes one parameter, sleeps for that many seconds, then completes"""

import sys
from medinfo.common.Util import ProgressDots
import time

duration = int(sys.argv[1])

progress = ProgressDots()
for i in range(duration):
    progress.update()
progress.printStatus()
示例#22
0
def main(argv):

    # Initial pass to get single diagnosis baselines by looking for 2x Dx combos where Dx1 = Dx2
    countByDx = dict()
    countByRxDx = dict()
    associationFile = open(argv[1])
    associationFile.readline()
    # Dump header row
    for line in associationFile:
        line.strip()
        chunks = line.split("\t")
        rx = chunks[0]
        dx1 = chunks[1]
        dx2 = chunks[2]
        if dx1 == dx2:
            rxDxCount = int(chunks[3])
            rxCount = int(chunks[4])
            dxCount = int(chunks[5])
            countByRxDx[(rx, dx1)] = rxDxCount
            countByDx[dx1] = dxCount

    # Second pass to now do stats for combo diagnoses, to see if prescription shows difference between 1 or both diagnoses
    statIds = (
        "P-Fisher",
        "P-YatesChi2",
        "oddsRatio",
        "relativeRisk",
        "interest",
        "LR+",
        "LR-",
        "sensitivity",
        "specificity",
        "PPV",
        "NPV",
    )
    headerCols = [
        "Rx",
        "Dx1",
        "Dx2",
        "RxDx1Dx2Count",
        "RxDx1Count",
        "RxDx2Count",
        "RxCount",
        "Dx1Dx2Count",
        "Total",
        "E(RxDx1Dx2Count)",
        "E(RxDx2Dx1Count)",
        "E(Dx1Dx2Count)",
        "P-Chi2-Obs:Exp",
    ]
    headerCols.extend(statIds)
    headerStr = str.join("\t", headerCols)
    print(headerStr)

    associationFile = open(argv[1])
    associationFile.readline()
    # Dump header row
    progress = ProgressDots()
    for line in associationFile:
        line.strip()
        chunks = line.split("\t")
        rx = chunks[0]
        dx1 = chunks[1]
        dx2 = chunks[2]
        rxDx1Dx2Count = int(chunks[3])
        rxDx1Count = countByRxDx[(rx, dx1)]
        rxDx2Count = countByRxDx[(rx, dx2)]
        rxCount = int(chunks[4])
        dx1Count = countByDx[dx1]
        dx2Count = countByDx[dx2]
        dx1dx2Count = int(chunks[5])
        totalCount = float(chunks[6])
        # Floating point to auto-convert float divisions later

        conStats = ContingencyStats(rxDx1Dx2Count, rxCount, dx1dx2Count,
                                    totalCount)

        # Expected vs. observed Rx rates dependent on presence of Dx1Dx2 combination based one whether Rx rates per diagnosis are independent of combination
        observed = array \
            ([  (rxDx1Dx2Count),           (dx1dx2Count-rxDx1Dx2Count),
                (rxCount-rxDx1Dx2Count),   (totalCount-dx1dx2Count-rxCount+rxDx1Dx2Count),
            ])

        # Expected rates based on assumption that diagnoses occur independently of one another
        expectedRxDx1Dx2 = (rxDx1Count * dx2Count / totalCount)
        expectedRxDx2Dx1 = (rxDx2Count * dx1Count / totalCount)
        expectedDx1Dx2 = (dx1Count * dx2Count / totalCount)
        expected = array \
            ([   (expectedRxDx1Dx2),            (expectedDx1Dx2-expectedRxDx1Dx2),
                (rxCount-expectedRxDx1Dx2),    (totalCount-expectedDx1Dx2-rxCount+expectedRxDx1Dx2),
            ])

        (chi2ObsExp, pChi2ObsExp) = chisquare(observed, expected)

        dataCells = [
            rx, dx1, dx2, rxDx1Dx2Count, rxDx1Count, rxDx2Count, rxCount,
            dx1dx2Count, totalCount, expectedRxDx1Dx2, expectedRxDx2Dx1,
            expectedDx1Dx2, pChi2ObsExp
        ]
        for statId in statIds:
            try:
                dataCells.append(conStats[statId])
            except ZeroDivisionError:
                dataCells.append(None)
        for i, value in enumerate(dataCells):
            dataCells[i] = str(value)
            # String conversion to allow for concatenation below
        dataStr = str.join("\t", dataCells)
        print(dataStr)
        progress.update()
    progress.printStatus()