Exemplo n.º 1
0
    def clinicalItemFromSourceItem(self, sourceItem, category, conn):
        # Load or produce a clinical_item record model for the given sourceItem
        sourceItem_description = "%s:%s" % (sourceItem['antibiotic_name'], sourceItem['suseptibility'])
        clinicalItemKey = (category["clinical_item_category_id"], sourceItem_description); ########## TODO probably needs to change
        if clinicalItemKey not in self.clinicalItemByCompositeKey:
            # Clinical Item does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            if sourceItem['antibiotic_name'] is None and sourceItem['suseptibility'] is None: # we only get here if no bacteria grew
                name = "Negative Culture"
                description ="Microculture Grew No Bacteria"
            else:
                name = "%s:%s" % (sourceItem['antibiotic_name'], sourceItem['suseptibility'])
                description = "%s TO %s" % (sourceItem['suseptibility'], sourceItem['antibiotic_name'])


            clinicalItem = \
                RowItemModel \
                (   {   "clinical_item_category_id": category["clinical_item_category_id"],
                        "external_id": None,
                        "name": name,
                        "description": description
                    }
                );
            (clinicalItemId, isNew) = DBUtil.findOrInsertItem("clinical_item", clinicalItem, conn=conn);
            clinicalItem["clinical_item_id"] = clinicalItemId;
            self.clinicalItemByCompositeKey[clinicalItemKey] = clinicalItem;
        return self.clinicalItemByCompositeKey[clinicalItemKey];
Exemplo n.º 2
0
    def __init__(self):
        """Default constructor"""
        self.connFactory = DBUtil.ConnectionFactory()
        # Default connection source

        self.categoryBySourceDescr = dict()
        self.clinicalItemByCategoryIdExtId = dict()
Exemplo n.º 3
0
    def _executeCachedDBQuery(self, query):
        # Only query if results not in cache.
        cacheKey = str(query)
        if cacheKey not in self.dbCache:
            self.dbCache[cacheKey] = DBUtil.execute(query)

        return self.dbCache[cacheKey]
Exemplo n.º 4
0
def queryPatients(period, locations, rxCount):
    log.info("Select patients fitting criteria in designated time period: (%s,%s)" % period);

    query = SQLQuery();
    query.addSelect("med.pat_id");
    query.addSelect("count(order_med_id)");
    query.addFrom("stride_mapped_meds as map");
    query.addFrom("stride_order_med as med");
    query.addFrom("stride_patient as pat");
    query.addWhere("analysis_status = 1");
    query.addWhere("map.medication_id = med.medication_id");
    query.addWhere("med.pat_id = pat.pat_id");
    query.addWhere("possible_oncology = 0");
    query.addWhereIn("patient_location", locations );
    query.addWhereOp("ordering_datetime",">", period[0] );
    query.addWhereOp("ordering_datetime","<", period[-1] );
    query.addGroupBy("med.pat_id");
    query.addHaving("count(order_med_id) >2");
    
    results = DBUtil.execute(query);
    cols = ["patientId","nOpioidRx"];
    patientDF = pd.DataFrame(results,columns=cols);
    #patientDF.set_index("patientId",drop=False,inplace=True);

    patientDF["periodStart"] = period[0];   # Identify this group of patient records

    return patientDF;
Exemplo n.º 5
0
    def _get_average_orders_per_patient(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        query = SQLQuery()
        if LocalEnv.DATASET_SOURCE_NAME == 'STRIDE':  #TODO: add STRIDE component routine
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addSelect('COUNT(sop.order_proc_id) AS num_orders')
            query.addFrom('stride_order_proc AS sop')
            query.addFrom('stride_order_results AS sor')
            query.addWhere('sop.order_proc_id = sor.order_proc_id')
            query.addWhereIn("proc_code", [self._lab_panel])
            components = self._get_components_in_lab_panel()
            query.addWhereIn("base_name", components)
            query.addGroupBy('pat_id')

        elif LocalEnv.DATASET_SOURCE_NAME == 'UMich':
            query.addSelect('CAST(pat_id AS BIGINT) AS pat_id')
            query.addSelect('COUNT(order_proc_id) AS num_orders')
            query.addFrom('labs')
            query.addWhereIn(self._varTypeInTable, [self._lab_var])
            components = self._get_components_in_lab_panel()
            query.addWhereIn("base_name", components)
            query.addGroupBy('pat_id')
        log.debug('Querying median orders per patient...')
        results = DBUtil.execute(query)
        order_counts = [row[1] for row in results]
        if len(order_counts) == 0:
            error_msg = '0 orders for lab "%s."' % self._lab_var
            log.critical(error_msg)
            raise Exception(error_msg)
            # sys.exit('[ERROR] %s' % error_msg) # sxu: sys.exit cannot be caught by Exception
        else:
            return numpy.median(order_counts)
Exemplo n.º 6
0
def loadRespiratoryViralPanelItemIds(extractor):
    # labCategoryId = 6;
    labCategoryId = DBUtil.execute(
        "select clinical_item_category_id from clinical_item_category where description like 'Lab'"
    )[0][0]

    query = SQLQuery()
    query.addSelect("clinical_item_id")
    query.addFrom("clinical_item")
    query.addWhereEqual("analysis_status", 1)
    query.addWhereIn("clinical_item_category_id", (labCategoryId, ))
    query.addWhere("description ~* '%s'" % 'Respiratory.*Panel')
    respiratoryViralPanelItemIds = set()
    for row in DBUtil.execute(query):
        respiratoryViralPanelItemIds.add(row[0])
    return respiratoryViralPanelItemIds
    def _insertUMichTestRecords(self):
        db_name = medinfo.db.Env.DB_PARAM['DSN']
        db_path = medinfo.db.Env.DB_PARAM['DATAPATH']
        conn = sqlite3.connect(db_path + '/' + db_name)

        table_names = ['labs', 'pt_info', 'demographics', 'encounters', 'diagnoses']

        for table_name in table_names:
            columns = FMTU.FM_TEST_INPUT_TABLES["%s_columns"%table_name]
            column_types = FMTU.FM_TEST_INPUT_TABLES["%s_column_types"%table_name]

            df = pd.DataFrame()
            for one_line in FMTU.FM_TEST_INPUT_TABLES['%s_data'%table_name]:
                df = df.append(dict(zip(columns, one_line)), ignore_index=True)

            df.to_sql(table_name, conn, if_exists="append", index=False)

        # First, write basic (pat_id, order_time) episode information to TempFile
        # Then, all [[pat_id, event_time]] operations are based on these episodes
        # i.e., pat_id are all from these pat_ids

        patientEpisodeQuery = SQLQuery()
        patientEpisodeQuery.addSelect("CAST(pat_id AS INTEGER) AS pat_id")
        patientEpisodeQuery.addSelect("order_time")
        patientEpisodeQuery.addFrom("labs")
        self.cursor.execute(str(patientEpisodeQuery), patientEpisodeQuery.params)

        self.factory.setPatientEpisodeInput(self.cursor, "pat_id", "order_time")
        self.factory.processPatientEpisodeInput()
        resultEpisodeIterator = self.factory.getPatientEpisodeIterator()
        resultPatientEpisodes = list()
        for episode in resultEpisodeIterator:
            episode["pat_id"] = int(episode["pat_id"])
            episode["order_time"] = DBUtil.parseDateValue(episode["order_time"])
            resultPatientEpisodes.append(episode)
Exemplo n.º 8
0
def queryClinicalItems(outputFile, clinicalItemIds, patientById):
    log.info("Query Clinical Items: %s" % str(clinicalItemIds))
    formatter = TextResultsFormatter(outputFile)

    colNames = ["patient_id", "item_date"]

    query = SQLQuery()
    for col in colNames:
        query.addSelect(col)
    query.addFrom("patient_item")
    query.addWhereIn("clinical_item_id", clinicalItemIds)
    query.addWhereIn("patient_id", patientById.keys())
    query.addOrderBy("patient_id")
    query.addOrderBy("item_date")

    DBUtil.execute(query, includeColumnNames=True, formatter=formatter)
Exemplo n.º 9
0
    def itemCollectionFromSourceItem(self, sourceItem, conn):
        # Load or produce an item_collection record model for the given sourceItem
        if sourceItem["protocol_id"] is None:
            # No order set link to this item, so nothing to return
            return None

        collectionKey = "%(protocol_id)s-%(protocol_name)s-%(section_name)s-%(smart_group)s" % sourceItem
        self.itemCollectionByKeyStr = dict()
        # Local cache to track item collections
        if collectionKey not in self.itemCollectionByKeyStr:
            # Collection does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            collection = \
                RowItemModel \
                (   {   "external_id":  sourceItem["protocol_id"],
                        "name":  sourceItem["protocol_name"],
                        "section":  sourceItem["section_name"],
                        "subgroup":  sourceItem["smart_group"],
                    }
                )
            (collectionId, isNew) = DBUtil.findOrInsertItem("item_collection",
                                                            collection,
                                                            conn=conn)
            collection["item_collection_id"] = collectionId
            self.itemCollectionByKeyStr[collectionKey] = collection
        return self.itemCollectionByKeyStr[collectionKey]
Exemplo n.º 10
0
 def clinicalItemFromSourceItem(self, sourceItem, category, conn):
     # Load or produce a clinical_item record model for the given sourceItem
     clinicalItemKey = (category["clinical_item_category_id"],
                        sourceItem["proc_code"])
     # This should be what determines a new unique clinical_item.
     #   Some debate about whether to distinguish by proc_id or proc_code, but there are many labs and other procs
     #   that use different proc_ids even though they are obviously the same. Go link in STRIDE_ORDER_PROC for examples like LABA1C.
     # The self.clinicalItemByCategoryIdExtId is supposed to keep track of which clinical_items we're already aware of,
     #   but not that it starts blank when this module runs.
     #   So you in theory should only run this conversion process on a database once
     #   (otherwise it will not be aware that a bunch of duplicate clinical_items already exist in the database).
     #   Alternatively, this module should be updated, so that it initializes this key tracker with whatever is already in the database.
     if clinicalItemKey not in self.clinicalItemByCategoryIdExtId:
         # Clinical Item does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
         clinicalItem = \
             RowItemModel \
             (   {   "clinical_item_category_id": category["clinical_item_category_id"],
                     "external_id": sourceItem["proc_id"],
                     "name": sourceItem["proc_code"],
                     "description": sourceItem["description"],
                 }
             )
         (clinicalItemId, isNew) = DBUtil.findOrInsertItem("clinical_item",
                                                           clinicalItem,
                                                           conn=conn)
         clinicalItem["clinical_item_id"] = clinicalItemId
         self.clinicalItemByCategoryIdExtId[clinicalItemKey] = clinicalItem
     return self.clinicalItemByCategoryIdExtId[clinicalItemKey]
Exemplo n.º 11
0
	def _getNonNullBaseNames(self):
		query = SQLQuery()
		# SELECT
		query.addSelect(BASE_NAME)
		query.addSelect('max_result_flag')
		# FROM
		query.addFrom('order_result_stat')
		# WHERE
		query.addWhere('max_result_flag is not null')

		print query
		print query.getParams()
		DBUtil.runDBScript(self.SCRIPT_FILE, False)
		results = DBUtil.execute(str(query), query.getParams())

		pd.DataFrame(results, columns=query.select).to_csv('base_names.csv', index=False)
Exemplo n.º 12
0
    def _test_dataConversion_aggregate(self):
        # Run the data conversion on the same data and look for expected records
        log.debug("Run the conversion process...")
        convOptions = ConversionOptions()
        convOptions.startDate = TEST_START_DATE
        convOptions.aggregate = True
        self.converter.convertSourceItems(convOptions)

        # Just query back for the same data, de-normalizing the data back to a general table
        testQuery = \
            """
            select 
                pi.external_id as pi_external_id,
                pi.patient_id,
                pi.encounter_id,
                cic.description as cic_description,
                ci.external_id as ci_external_id,
                ci.name,
                ci.description as ci_description,
                pi.item_date
            from
                %s.patient_item as pi,
                %s.clinical_item as ci,
                %s.clinical_item_category as cic
            where
                pi.clinical_item_id = ci.clinical_item_id and
                ci.clinical_item_category_id = cic.clinical_item_category_id and
                cic.source_table = '%s'
            order by
                pi.external_id desc, ci.external_id desc
            """ % (TEST_DEST_DATASET, TEST_DEST_DATASET, TEST_DEST_DATASET, TEST_SOURCE_TABLE)
        expectedData = \
            []
        actualData = DBUtil.execute(testQuery)
        self.assertEqualTable(expectedData, actualData)
Exemplo n.º 13
0
    def itemCollectionFromSourceItem(self, sourceItem, conn):
        # Load or produce an item_collection record model for the given sourceItem
        if sourceItem["protocol_id"] is None:
            # No order set link to this item, so nothing to return
            return None

        key = {
            "protocol_id":
            sourceItem["protocol_id"],
            "ss_section_id":
            sourceItem["ss_section_id"],
            "ss_sg_key":
            sourceItem["ss_sg_key"].strip().upper()
            if sourceItem["ss_sg_key"] is not None else None
        }

        collection_key = "%(protocol_id)d-%(ss_section_id)s-%(ss_sg_key)s" % key
        if collection_key not in self.itemCollectionByKeyStr:
            # Collection does not yet exist in the local cache.  Check if in database table (if not, persist a new record)
            collection = RowItemModel({
                "external_id": sourceItem["protocol_id"],
                "name": sourceItem["protocol_name"],
                "section": sourceItem["ss_section_name"],
                "subgroup": sourceItem["ss_sg_name"],
            })
            (collectionId, isNew) = DBUtil.findOrInsertItem("item_collection",
                                                            collection,
                                                            conn=conn)
            collection["item_collection_id"] = collectionId
            self.itemCollectionByKeyStr[collection_key] = collection
        return self.itemCollectionByKeyStr[collection_key]
Exemplo n.º 14
0
    def action_default(self):
        # Read checkboxes by presence or absence of field
        self.requestData[
            "incCols"] = ""  # Checkboxes not passed if unchecked, so extra step to ensure uncheck is persisted
        incCols = False
        if "incCols" in self.mForm:
            self.requestData["incCols"] = self.mForm["incCols"].value
            incCols = True

        # Point to the specified database
        connFactory = self.connectionFactory()

        timer = time.time()
        # Just execute a normal query, possibly with a result set
        results = DBUtil.execute(self.mForm["input"].value,
                                 includeColumnNames=incCols,
                                 connFactory=connFactory)
        if type(results) == list:  # Result set, format as table
            formatter = TextResultsFormatter(StringIO())
            formatter.formatResultSet(results)
            self.requestData["resultsText"] = formatter.getOutFile().getvalue()

            headerRowFormat = None
            if incCols: headerRowFormat = "th"

            formatter = HtmlResultsFormatter(StringIO(), headerRowFormat)
            formatter.formatResultSet(results)
            self.requestData["resultsHtml"] = formatter.getOutFile().getvalue()

            self.requestData["resultsInfo"] = "(%d rows) " % len(results)
        else:
            self.requestData[
                "resultsText"] = "%d rows affected (or other return code)" % results
        timer = time.time() - timer
        self.requestData["resultsInfo"] += "(%1.3f seconds)" % timer
Exemplo n.º 15
0
    def _getClinicalItems(self):
        query = SQLQuery()
        # SELECT
        query.addSelect(CLINICAL_ITEM_ID)
        query.addSelect('name')
        # FROM
        query.addFrom('clinical_item')

        print(query)
        print(query.getParams())
        DBUtil.runDBScript(self.SCRIPT_FILE, False)
        results = DBUtil.execute(str(query), query.getParams())

        pd.DataFrame(results,
                     columns=query.select).to_csv('clinical_items.csv',
                                                  index=False)
Exemplo n.º 16
0
    def setCacheData(self, key, value, conn=None):
        """Utility function to set cached data item in data_cache table"""
        extConn = conn is not None
        if not extConn:
            conn = self.connFactory.connection()

        # Clear any prior setting to make way for the new one
        self.clearCacheData(key, conn=conn)

        insertQuery = DBUtil.buildInsertQuery(
            "data_cache", ("data_key", "data_value", "last_update"))
        insertParams = (key, str(value), datetime.now())
        DBUtil.execute(insertQuery, insertParams, conn=conn)

        if not extConn:
            conn.close()
Exemplo n.º 17
0
    def loadClinicalItemBaseCountByItemId(self, countPrefix=None, acceptCache=True, conn=None):
        """Helper query to get the baseline analyzed item counts for all of the clinical items
        If countPrefix is provided, can use alternative total item counts instead of the default item_count,
        such as patient_count or encounter_count to match the respective association query baselines used.
        """
        extConn = True;
        if conn is None:
            conn = self.connFactory.connection();
            extConn = False;
        try:
            # First ensure the denormalized count data is updated
            self.updateClinicalItemCounts(acceptCache=acceptCache, conn=conn);

            if countPrefix is None or countPrefix == "":
                countPrefix = "item_";  # Default to general item counts, allowing for repeats per patient

            baseCountQuery = SQLQuery();
            baseCountQuery.addSelect("clinical_item_id");
            baseCountQuery.addSelect("%scount" % countPrefix);
            baseCountQuery.addFrom("clinical_item");
            if acceptCache:
                baseCountResultTable = self.executeCacheOption( baseCountQuery, conn=conn );
            else:
                baseCountResultTable = DBUtil.execute( baseCountQuery, conn=conn );

            baseCountByItemId = dict();
            for (itemId, baseCount) in baseCountResultTable:
                baseCountByItemId[itemId] = baseCount;
            return baseCountByItemId;

        finally:
            if not extConn:
                conn.close();
Exemplo n.º 18
0
def parseLabResultsFile(labFile):
    log.info("Parse lab results file")
    prog = ProgressDots()
    labsByBaseNameByPatientId = dict()
    # Dictionary of dictionaries of lists of result items
    for labResult in TabDictReader(labFile):
        if labResult["ord_num_value"] is not None and labResult[
                "ord_num_value"] != NULL_STRING:
            patientId = int(labResult["pat_id"])
            labBaseName = labResult["base_name"]
            resultValue = float(labResult["ord_num_value"])
            resultTime = DBUtil.parseDateValue(labResult["result_time"])

            if resultValue < LAB_SENTINEL_VALUE:  # Skip apparent placeholder values
                labResult["pat_id"] = labResult["patient_id"] = patientId
                labResult["ord_num_value"] = resultValue
                labResult["result_time"] = resultTime

                if patientId not in labsByBaseNameByPatientId:
                    labsByBaseNameByPatientId[patientId] = dict()
                if labBaseName not in labsByBaseNameByPatientId[patientId]:
                    labsByBaseNameByPatientId[patientId][labBaseName] = list()
                labsByBaseNameByPatientId[patientId][labBaseName].append(
                    labResult)

        prog.update()
    prog.printStatus()
    return labsByBaseNameByPatientId
Exemplo n.º 19
0
    def _get_random_patient_list(self):
        # Initialize DB cursor.
        cursor = self._connection.cursor()

        # Get average number of results for this lab test per patient.
        avg_orders_per_patient = self._get_average_orders_per_patient()
        log.info('avg_orders_per_patient: %s' % avg_orders_per_patient)
        # Based on average # of results, figure out how many patients we'd
        # need to get for a feature matrix of requested size.
        self._num_patients = int(numpy.max([self._num_requested_episodes / \
            avg_orders_per_patient, 1]))

        # Get numPatientsToQuery random patients who have gotten test.
        # TODO(sbala): Have option to feed in a seed for the randomness.
        query = SQLQuery()
        query.addSelect('pat_id')
        query.addFrom('stride_order_proc AS sop')
        query.addWhereIn('proc_code', [self._lab_panel])
        query.addOrderBy('RANDOM()')
        query.setLimit(self._num_patients)
        log.debug('Querying random patient list...')
        results = DBUtil.execute(query)

        # Get patient list.
        random_patient_list = [row[0] for row in results]

        return random_patient_list
Exemplo n.º 20
0
    def __init__(self):
        """Default constructor"""
        self.connFactory = DBUtil.ConnectionFactory();  # Default connection source

        self.categoryBySourceDescr = dict();
        self.clinicalItemByCategoryIdExtId = dict();
        self.resultStatsByBaseName = None;
Exemplo n.º 21
0
    def load_stride_to_psql():
        # Build clean data files.
        StrideLoader.build_clean_csv_files()

        # Build psql schemata.
        StrideLoader.build_stride_psql_schemata()

        # Build paths to clean data files.
        clean_data_dir = StrideLoader.fetch_clean_data_dir()
        for raw_file in sorted(STRIDE_LOADER_PARAMS.keys()):
            params = STRIDE_LOADER_PARAMS[raw_file]

            # Build clean data file.
            clean_file = params['clean_file'] % TABLE_PREFIX
            log.info('loading %s...' % clean_file)
            clean_path = os.path.join(clean_data_dir, clean_file)

            # Uncompress data file.
            unzipped_clean_path = clean_path[:-3]
            with gzip.open(clean_path,
                           'rb') as f_in, open(unzipped_clean_path,
                                               'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

            # psql COPY data from clean files into DB.
            psql_table = params['psql_table'] % TABLE_PREFIX
            log.debug('stride/data/clean/%s ==> %s' % (clean_file, psql_table))
            # In some cases, two files going to the same table will have
            # non-identical column names. Pass these explicitly so that
            # psql knows which columns to try to fill from file.
            # Strip the newline character.
            with open(unzipped_clean_path, 'r') as f_in:
                columns = f_in.readline()[:-1]
            command = "COPY %s (%s) FROM '%s' WITH (FORMAT csv, HEADER);" % (
                psql_table, columns, unzipped_clean_path)
            DBUtil.execute(command)

            # Delete unzipped_clean_path.
            os.remove(unzipped_clean_path)

        # Run any one-off postprocessing transformations which all users
        # of the STRIDE database should receive. Defer any application-specific
        # transformations to other modules.
        StrideLoader.process_stride_psql_db()

        # Build indices.
        StrideLoader.build_stride_psql_indices()
    def tearDown(self):
        """Restore state from any setUp or test steps"""
        log.info("Purge test records from the database")

        DBUtil.execute \
        (   """delete from patient_item_collection_link
            where item_collection_item_id in 
            (   select item_collection_item_id
                from item_collection_item as ici, item_collection as ic
                where ici.item_collection_id = ic.item_collection_id
                and ic.external_id < 0
            );
            """
        )
        DBUtil.execute \
        (   """delete from item_collection_item
            where item_collection_id in 
            (   select item_collection_id
                from item_collection as ic
                where ic.external_id < 0
            );
            """
        )
        DBUtil.execute("delete from item_collection where external_id < 0;")


        DBUtil.execute \
        (   """delete from patient_item 
            where clinical_item_id in 
            (   select clinical_item_id
                from clinical_item as ci, clinical_item_category as cic
                where ci.clinical_item_category_id = cic.clinical_item_category_id
                and cic.source_table = 'stride_order_proc'
            );
            """
        )
        DBUtil.execute \
        (   """delete from clinical_item 
            where clinical_item_category_id in 
            (   select clinical_item_category_id 
                from clinical_item_category 
                where source_table = 'stride_order_proc'
            );
            """
        )
        DBUtil.execute(
            "delete from clinical_item_category where source_table = 'stride_order_proc';"
        )

        DBUtil.execute(
            "delete from stride_orderset_order_proc where order_proc_id in (%s)"
            % str.join(",", self.orderProcIdStrList))
        DBUtil.execute(
            "delete from stride_order_proc where order_proc_id in (%s)" %
            str.join(",", self.orderProcIdStrList))

        DBTestCase.tearDown(self)
    def prepareItemAssociations(self, itemIdPairs, linkedItemIdsByBaseId,
                                conn):
        """Make sure all pair-wise item association records are ready / initialized
        so that subsequent queries don't have to pause to check for their existence.
        Should help greatly to reduce number of queries and execution time.
        """
        clinicalItemIdSet = set()
        #Do the below to convert the list of strings into a list of pairs, which is needed for the rest of this function
        for index, pair in enumerate(itemIdPairs):
            itemIdPairs[index] = eval(pair)

        for (itemId1, itemId2) in itemIdPairs:
            clinicalItemIdSet.add(itemId1)
            clinicalItemIdSet.add(itemId2)
        nItems = len(clinicalItemIdSet)

        # Now go through all needed item pairs and create default records as needed
        log.debug("Ensure %d baseline records ready" % (nItems * nItems))
        for itemId1 in clinicalItemIdSet:
            # Query to see which ones already exist in the database
            # Do this for each source clinical item instead of all combinations to avoid excessive in memory tracking
            query = SQLQuery()
            query.addSelect("clinical_item_id")
            query.addSelect("subsequent_item_id")
            query.addFrom("clinical_item_association")
            query.addWhereEqual("clinical_item_id", itemId1)
            query.addWhereIn("subsequent_item_id", clinicalItemIdSet)
            associationTable = DBUtil.execute(query, conn=conn)

            # Keep track in memory temporarily for rapid lookup
            existingItemIdPairs = set()
            for row in associationTable:
                existingItemIdPairs.add(tuple(row))

            for itemId2 in clinicalItemIdSet:
                itemIdPair = (itemId1, itemId2)
                if itemIdPair not in existingItemIdPairs and self.acceptableClinicalItemIdPair(
                        itemId1, itemId2, linkedItemIdsByBaseId):
                    defaultAssociation = RowItemModel(
                        itemIdPair, ("clinical_item_id", "subsequent_item_id"))
                    try:  # Optimistic insert of a new item pair, should be safe since just checked above, but parallel processes may collide
                        DBUtil.insertRow("clinical_item_association",
                                         defaultAssociation,
                                         conn=conn)
                    except conn.IntegrityError, err:
                        log.warning(err)
                        pass
Exemplo n.º 24
0
    def querySourceItems(self, convOptions, progress=None, conn=None):
        """Query the database for list of all source clinical items (culture results, etc.)
        and yield the results one at a time.  If startDate provided, only return items whose
        occurence date is on or after that date.
        """
        extConn = conn is not None;
        if not extConn:
            conn = self.connFactory.connection();

        # Column headers to query for that map to respective fields in analysis table
        headers = ["order_proc_anon_id","pat_anon_id","pat_enc_csn_anon_id","proc_code","organism_name","antibiotic_name","suseptibility", "shifted_result_time"];

        query = SQLQuery();
        for header in headers:
            query.addSelect( header );
        query.addFrom("stride_culture_micro");
        # TODO: FIGURE OUT WHY CAN"T DO >= OPERATION HERE
        # if convOptions.startDate is not None:
        #     query.addWhereOp("shifted_result_time",">=", convOptions.startDate);
        # if convOptions.endDate is not None:
        #     query.addWhereOp("shifted_result_time","<", convOptions.endDate);  # Still use begin date as common filter value
      
      
        # Query to get an estimate of how long the process will be
        if progress is not None:
            progress.total = DBUtil.execute(query.totalQuery(), conn=conn)[0][0];

        cursor = conn.cursor();
        # Do one massive query, but yield data for one item at a time.
        cursor.execute( str(query), tuple(query.params) );

        row = cursor.fetchone();
        while row is not None:
            rowModel = RowItemModel( row, headers );

            if rowModel['shifted_result_time'] is None: # Don't add if no result time given
                row = cursor.fetchone();
                continue

            if rowModel['organism_name'] is not None: # if positive culture but results uninterpretable, don't add feature
                if rowModel['suseptibility'] is None or rowModel['antibiotic_name'] == 'Method' or rowModel['antibiotic_name'] is None:
                    row = cursor.fetchone();
                    continue

            # So that we don't run into directory issues later when writing temp files
            try:
                rowModel['antibiotic_name'] = rowModel['antibiotic_name'].replace('/', '-') 
            except: # When antibiotic name is none
                pass

            yield rowModel; # Yield one row worth of data at a time to avoid having to keep the whole result set in memory
            row = cursor.fetchone();

        # Slight risk here.  Normally DB connection closing should be in finally of a try block,
        #   but using the "yield" generator construct forbids us from using a try, finally construct.
        cursor.close();

        if not extConn:
            conn.close();
Exemplo n.º 25
0
    def loadPatientOrders(self,
                          patientId,
                          currentTime,
                          loadActive=True,
                          conn=None):
        """Load orders for the given patient that exist by the specified current time point.
        loadActive - Specify whether to load active vs. inactive/completed orders.  Set to None to load both
        """
        extConn = True
        if conn is None:
            conn = self.connFactory.connection()
            extConn = False
        try:
            query = SQLQuery()
            query.addSelect("po.sim_patient_order_id")
            query.addSelect("po.sim_user_id")
            query.addSelect("po.sim_patient_id")
            query.addSelect("po.sim_state_id")
            query.addSelect("po.clinical_item_id")
            query.addSelect("po.relative_time_start")
            query.addSelect("po.relative_time_end")
            query.addSelect("ci.name")
            query.addSelect("ci.description")
            query.addSelect("cic.source_table")
            query.addSelect("cic.description as category_description")
            query.addFrom("sim_patient_order as po")
            query.addFrom("clinical_item as ci")
            query.addFrom("clinical_item_category as cic")
            query.addWhere("po.clinical_item_id = ci.clinical_item_id")
            query.addWhere(
                "ci.clinical_item_category_id = cic.clinical_item_category_id")
            query.addWhereEqual("sim_patient_id", patientId)
            query.addWhereOp("relative_time_start", "<=", currentTime)

            if loadActive:  # Filter out inactive orders here.
                query.openWhereOrClause()
                query.addWhere("relative_time_end is null")
                query.addWhereOp("relative_time_end", ">", currentTime)
                query.closeWhereOrClause()
            #elif loadActive is not None:    # Filter out active orders here.
            #    query.addWhereOp("relative_time_end","<=", currentTime)

            if loadActive:  # Organize currently active orders by category
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")
                query.addOrderBy("relative_time_start")
            else:  # Otherwise chronologic order
                query.addOrderBy("relative_time_start")
                query.addOrderBy("cic.description")
                query.addOrderBy("ci.description")

            dataTable = DBUtil.execute(query,
                                       includeColumnNames=True,
                                       conn=conn)
            dataModels = modelListFromTable(dataTable)
            return dataModels
        finally:
            if not extConn:
                conn.close()
Exemplo n.º 26
0
    def removeItemCollectionTablesAddedLines(self, source_table):
        """delete added records"""
        log.info(
            'Removing item_collection_item and item_collection added lines in PSQL DB'
        )

        DBUtil.execute("""delete from item_collection_item ici
               using clinical_item ci, clinical_item_category cic
               where ici.clinical_item_id = ci.clinical_item_id
                 and ci.clinical_item_category_id = cic.clinical_item_category_id
                 and cic.source_table = '{}';
                 """.format(source_table),
                       conn=self.pgConn)

        # TODO should be using source_table also
        DBUtil.execute("delete from item_collection where true;",
                       conn=self.pgConn)
Exemplo n.º 27
0
    def test_runDBScript_commandline(self):
        # Equivalent to test_runDBScript, but try higher level interface
        #   through command-line "main" method
        origStdin = sys.stdin
        sys.stdin = self.SCRIPT_FILE
        argv = ["DBUtil.py", "--script", "-"]
        DBUtil.main(argv)
        sys.stdin = origStdin

        # Run some other commands to see if scripts produced expected results
        results = DBUtil.execute(
            "select * from TestTypes where MyInteger > %s", (200, ))
        self.assertEqual(2, len(results))

        results = DBUtil.execute(
            "select * from TestTypes where MyInteger < %s", (100, ))
        self.assertEqual(0, len(results))
    def __init__(self):
        """Default constructor"""
        self.connFactory = DBUtil.ConnectionFactory();  # Default connection source, but Allow specification of alternative DB connection source

        self.categoryBySourceDescr = dict();    # Local cache to track the clinical item category table contents
        self.clinicalItemByCategoryIdExtId = dict(); # Local cache to track clinical item table contents
        self.itemCollectionByKeyStr = dict();   # Local cache to track item collections
        self.itemCollectionItemByCollectionIdItemId = dict();   # Local cache to track item collection items
Exemplo n.º 29
0
def loadBloodCultureItemIds(extractor):
    # microCategoryId = 15;
    microCategoryId = DBUtil.execute(
        "select clinical_item_category_id from clinical_item_category where description like 'Microbiology'"
    )[0][0]

    # Look for diagnostic tests indicating suspected infection / sepsis
    query = SQLQuery()
    query.addSelect("clinical_item_id")
    query.addFrom("clinical_item")
    query.addWhereEqual("analysis_status", 1)
    query.addWhereIn("clinical_item_category_id", (microCategoryId, ))
    query.addWhere("description ~* '%s'" % 'Blood Culture')
    bloodCultureItemIds = set()
    for row in DBUtil.execute(query):
        bloodCultureItemIds.add(row[0])
    return bloodCultureItemIds
Exemplo n.º 30
0
    def __init__(self):
        """Default constructor"""
        self.connFactory = DBUtil.ConnectionFactory()
        # Default connection source, but Allow specification of alternative DB connection source

        self.categoryBySourceDescr = dict()
        # Local cache to track the clinical item category table contents
        self.clinicalItemByCompositeKey = dict()