示例#1
0
    def _getAdmitDxPatientFrequencyRankByYear(self):
        # Get list of all clinical item IDs matching admit diagnosis.
        # Get this list in advance to make subsequent query run a bit faster.
        admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds()

        # Build query for # of unique patients.
        # SELECT
        #   ci.name AS icd_code,
        #   ci.description AS admit_dx,
        #   EXTRACT(YEAR FROM pi.item_date) AS admit_year,
        #   COUNT(DISTINCT pi.patient_id) AS num_unique_patients,
        # FROM
        #   patient_item AS pi
        # JOIN
        #   clinical_item AS ci
        # ON
        #   pi.clinical_item_id = ci.clinical_item_id
        # WHERE
        #   ci.clinical_item_id in (admitDxClinicalItemIds)
        # GROUP BY
        #   icd_code,
        #   admit_dx,
        #   admit_year
        #   num_unique_patients
        # ORDER BY
        #   admit_year,
        #   num_unique_patients DESC
        query = SQLQuery()
        query.addSelect("ci.name AS icd_code")
        query.addSelect("ci.description AS admit_dx")
        query.addSelect("EXTRACT(YEAR FROM pi.item_date) AS admit_year")
        query.addSelect("COUNT(DISTINCT pi.patient_id) AS num_unique_patients")
        query.addFrom("patient_item AS pi")
        query.addJoin("clinical_item AS ci",
                      "pi.clinical_item_id = ci.clinical_item_id")
        query.addWhereIn("ci.clinical_item_id", admitDxClinicalItemIds)
        query.addGroupBy("icd_code")
        query.addGroupBy("admit_dx")
        query.addGroupBy("admit_year")
        query.addGroupBy("num_unique_patients")
        query.addOrderBy("icd_code")
        query.addOrderBy("admit_year")
        query.addOrderBy("num_unique_patients DESC")

        # Execute query.
        results = DBUtil.execute(query)
示例#2
0
def add_sim_case_column_to(csv):
    query = SQLQuery()
    query.addSelect("sim_patient_id")
    query.addSelect("sim_case_name as sim_case")
    query.addFrom("sim_grading_key sgk")
    query.addJoin(
        "sim_patient_order spo",
        "sgk.clinical_item_id = spo.clinical_item_id and spo.sim_state_id = sgk.sim_state_id"
    )
    query.addGroupBy("sim_patient_id")
    query.addGroupBy("sim_case_name")
    case_names = DBUtil.execute(str(query))

    # merge sim_case column
    csv = pd.merge(csv,
                   pd.DataFrame(case_names,
                                columns=['sim_patient_id', 'sim_case']),
                   left_on='patient',
                   right_on='sim_patient_id')
    return csv
示例#3
0
    def test_draw(self):
        # Query events by clinical_item_category.
        # SELECT
        #     pi.patient_id AS sequence_id,
        #     pi.item_date AS event_time,
        #     cic.description AS event_id
        # FROM
        #     patient_item AS pi
        # JOIN
        #     clinical_item AS ci
        # ON
        #     pi.clinical_item_id = ci.clinical_item_id
        # JOIN
        #     clinical_item_category AS cic
        # ON
        #     ci.clinical_item_category_id = cic.clinical_item_category_id
        # ORDER BY
        #     sequence_id,
        #     event_time,
        #     event_id
        query = SQLQuery()
        query.addSelect('pi.patient_id AS sequence_id')
        query.addSelect('pi.item_date AS event_time')
        query.addSelect('cic.description AS event_id')
        query.addFrom('patient_item AS pi')
        query.addJoin('clinical_item AS ci',
                      'pi.clinical_item_id = ci.clinical_item_id')
        query.addJoin(
            'clinical_item_category AS cic',
            'ci.clinical_item_category_id = cic.clinical_item_category_id')
        query.addOrderBy('sequence_id')
        query.addOrderBy('event_time')
        query.addOrderBy('event_id')
        events = DBUtil.execute(query)

        # Build graph based on clinical_item_category.
        categoryDigraph = EventDigraph(events)
        categoryDigraphVizFileName = "test-category-digraph.png"
        categoryDigraph.draw(categoryDigraphVizFileName)
示例#4
0
# Find all clinical_item_ids associated with each patient_id
# SELECT * FROM clinical_item WHERE clinical_item_category_id = 161;

DATA_QUERY = SQLQuery()

# From patient_item or clinical_item
DATA_QUERY.addSelect("patient_id")
DATA_QUERY.addSelect("clinical_item_category_id")
DATA_QUERY.addSelect('name')
DATA_QUERY.addSelect("description")

# Join
DATA_QUERY.addFrom("patient_item")
DATA_QUERY.addJoin(
    "clinical_item",
    "patient_item.clinical_item_id = clinical_item.clinical_item_id",
    joinType="INNER")
DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description",
                         "Tt Med Univ (Primary)")  # Everyone
#DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert

DATA_QUERY.addOrderBy("patient_id", dir="ASC")

print(DATA_QUERY)

# Write out data to CSV
DBUtil.runDBScript(SCRIPT_FILE, False)
results = DBUtil.execute(DATA_QUERY)

unique_patient_ids = {}
示例#5
0
    def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None):
        """Given the identifiers for a bunch of simulated physician-patient case
        records, and the identifier for a particular grading key to use,
        calculate what grade each case would get based on the choices made
        and return a dictionary of case grades (keyed by the case ID).
        """
        ext_conn = True
        if conn is None:
            conn = self.connFactory.connection()
            ext_conn = False
        try:
            # Inner query retrieves physician-patient cases with ranking group_names (to later select first)
            # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it
            # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading.
            inner_query = SQLQuery()
            inner_query.addSelect("score")
            inner_query.addSelect(
                "rank() over ("  # ranks rows incrementally in the same group
                "    partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id"
                "    order by sim_patient_order_id"
                ")")
            inner_query.addSelect("sim_user_id")
            inner_query.addSelect("sim_patient_id")
            inner_query.addSelect("sim_grader_id")
            inner_query.addFrom("sim_patient_order spo")
            inner_query.addJoin(
                "sim_grading_key sgk",
                "sgk.clinical_item_id = spo.clinical_item_id"
                "    and sgk.sim_state_id = spo.sim_state_id")
            inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id)
            inner_query.addWhereNotEqual(
                "spo.sim_user_id",
                0)  # 0 = ignore 'Default user', sets up initial cases
            inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids)
            inner_query.addOrderBy("relative_time_start")
            inner_query.addOrderBy("sim_patient_order_id")

            # Outer query sums the score per patient case and selects most graded physician for the case.
            # Theoretically, it isn't necessarily the most active physician for the case since his orders
            # might have been dropped by selecting only the first record within group_name group.
            query = SQLQuery()
            query.addSelect("sim_patient_id")
            query.addSelect("sim_grader_id")
            query.addSelect("sum(score) as total_score")
            query.addSelect(
                "mode() within group ("  # mode() selects most frequent value within group
                "    order by sim_user_id"
                ") as most_graded_user_id")
            query.addFrom("(" + str(inner_query) + ") as ranked_groups")
            query.addWhereEqual("ranked_groups.rank",
                                1)  # count only first order in the same group
            query.addGroupBy("sim_patient_id")
            query.addGroupBy("sim_grader_id")

            query_params = inner_query.getParams() + query.getParams()
            grades_table = DBUtil.execute(query,
                                          query_params,
                                          includeColumnNames=True,
                                          conn=conn)
            grades_model = modelListFromTable(grades_table)

            # get most active users for the cases
            most_active_user_query = SQLQuery()
            most_active_user_query.addSelect("sim_patient_id")
            most_active_user_query.addSelect("mode() within group ("
                                             "    order by sim_user_id"
                                             ") as most_active_user_id")
            most_active_user_query.addFrom("sim_patient_order")
            most_active_user_query.addWhereNotEqual("sim_user_id",
                                                    0)  # ignore Default user
            most_active_user_query.addWhereIn("sim_patient_id",
                                              sim_patient_ids)
            most_active_user_query.addGroupBy("sim_patient_id")
            most_active_user_query.addOrderBy("sim_patient_id")

            most_active_user_table = DBUtil.execute(most_active_user_query,
                                                    includeColumnNames=True,
                                                    conn=conn)
            most_active_user_model = modelListFromTable(most_active_user_table)
            # make a dict by sim_patient_id out of results - will be used for combining
            most_active_user_dict = {
                most_active_user["sim_patient_id"]: most_active_user
                for most_active_user in most_active_user_model
            }

            # combine results
            complete_grades = [
                grade.update(most_active_user_dict[grade["sim_patient_id"]])
                for grade in grades_model
            ]

            return complete_grades

        finally:
            if not ext_conn:
                conn.close()
示例#6
0
    def test_init(self):
        # Query events by clinical_item_category.
        # SELECT
        #     pi.patient_id AS sequence_id,
        #     pi.item_date AS event_time,
        #     cic.description AS event_id
        # FROM
        #     patient_item AS pi
        # JOIN
        #     clinical_item AS ci
        # ON
        #     pi.clinical_item_id = ci.clinical_item_id
        # JOIN
        #     clinical_item_category AS cic
        # ON
        #     ci.clinical_item_category_id = cic.clinical_item_category_id
        # ORDER BY
        #     sequence_id,
        #     event_time,
        #     event_id
        query = SQLQuery()
        query.addSelect('pi.patient_id AS sequence_id')
        query.addSelect('pi.item_date AS event_time')
        query.addSelect('cic.description AS event_id')
        query.addFrom('patient_item AS pi')
        query.addJoin('clinical_item AS ci',
                      'pi.clinical_item_id = ci.clinical_item_id')
        query.addJoin(
            'clinical_item_category AS cic',
            'ci.clinical_item_category_id = cic.clinical_item_category_id')
        query.addOrderBy('sequence_id')
        query.addOrderBy('event_time')
        query.addOrderBy('event_id')
        events = DBUtil.execute(query)

        # Build graph based on clinical_item_category.
        categoryDigraph = EventDigraph(events)
        # Sort for easier comparison against test data.
        actualCategoryNodes = sorted(categoryDigraph.nodes())
        actualCategoryEdges = sorted(categoryDigraph.edges())

        # Validate results.
        expectedCategoryNodes = ED_TEST_OUTPUT_TABLES['test_init'][
            'category_nodes']
        self.assertEqualList(actualCategoryNodes, expectedCategoryNodes)
        expectedCategoryEdges = ED_TEST_OUTPUT_TABLES['test_init'][
            'category_edges']
        self.assertEqualList(actualCategoryEdges, expectedCategoryEdges)

        # Query events by clinical_item.
        # SELECT
        #     pi.patient_id AS sequence_id,
        #     pi.item_date AS event_time,
        #     ci.description AS event_id
        # FROM
        #     patient_item AS pi
        # JOIN
        #     clinical_item AS ci
        # ON
        #     pi.clinical_item_id = ci.clinical_item_id
        # ORDER BY
        #     sequence_id,
        #     event_time,
        #     event_id
        query = SQLQuery()
        query.addSelect('pi.patient_id AS sequence_id')
        query.addSelect('pi.item_date AS event_time')
        query.addSelect('ci.description AS event_id')
        query.addFrom('patient_item AS pi')
        query.addJoin('clinical_item AS ci',
                      'pi.clinical_item_id = ci.clinical_item_id')
        query.addJoin(
            'clinical_item_category AS cic',
            'ci.clinical_item_category_id = cic.clinical_item_category_id')
        query.addOrderBy('sequence_id')
        query.addOrderBy('event_time')
        query.addOrderBy('event_id')
        events = DBUtil.execute(query)

        # Build graph based on clinical_item.
        itemDigraph = EventDigraph(events)
        # Sort for easier comparison against test data.
        actualItemNodes = sorted(itemDigraph.nodes())
        actualItemEdges = sorted(itemDigraph.edges())

        # Validate results.
        expectedItemNodes = ED_TEST_OUTPUT_TABLES['test_init']['item_nodes']
        self.assertEqualList(actualItemNodes, expectedItemNodes)
        expectedItemEdges = ED_TEST_OUTPUT_TABLES['test_init']['item_edges']
        self.assertEqualList(actualItemEdges, expectedItemEdges)