def _getAdmitDxPatientFrequencyRankByYear(self): # Get list of all clinical item IDs matching admit diagnosis. # Get this list in advance to make subsequent query run a bit faster. admitDxClinicalItemIds = self._getAdmitDxClinicalItemIds() # Build query for # of unique patients. # SELECT # ci.name AS icd_code, # ci.description AS admit_dx, # EXTRACT(YEAR FROM pi.item_date) AS admit_year, # COUNT(DISTINCT pi.patient_id) AS num_unique_patients, # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # WHERE # ci.clinical_item_id in (admitDxClinicalItemIds) # GROUP BY # icd_code, # admit_dx, # admit_year # num_unique_patients # ORDER BY # admit_year, # num_unique_patients DESC query = SQLQuery() query.addSelect("ci.name AS icd_code") query.addSelect("ci.description AS admit_dx") query.addSelect("EXTRACT(YEAR FROM pi.item_date) AS admit_year") query.addSelect("COUNT(DISTINCT pi.patient_id) AS num_unique_patients") query.addFrom("patient_item AS pi") query.addJoin("clinical_item AS ci", "pi.clinical_item_id = ci.clinical_item_id") query.addWhereIn("ci.clinical_item_id", admitDxClinicalItemIds) query.addGroupBy("icd_code") query.addGroupBy("admit_dx") query.addGroupBy("admit_year") query.addGroupBy("num_unique_patients") query.addOrderBy("icd_code") query.addOrderBy("admit_year") query.addOrderBy("num_unique_patients DESC") # Execute query. results = DBUtil.execute(query)
def add_sim_case_column_to(csv): query = SQLQuery() query.addSelect("sim_patient_id") query.addSelect("sim_case_name as sim_case") query.addFrom("sim_grading_key sgk") query.addJoin( "sim_patient_order spo", "sgk.clinical_item_id = spo.clinical_item_id and spo.sim_state_id = sgk.sim_state_id" ) query.addGroupBy("sim_patient_id") query.addGroupBy("sim_case_name") case_names = DBUtil.execute(str(query)) # merge sim_case column csv = pd.merge(csv, pd.DataFrame(case_names, columns=['sim_patient_id', 'sim_case']), left_on='patient', right_on='sim_patient_id') return csv
def test_draw(self): # Query events by clinical_item_category. # SELECT # pi.patient_id AS sequence_id, # pi.item_date AS event_time, # cic.description AS event_id # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # JOIN # clinical_item_category AS cic # ON # ci.clinical_item_category_id = cic.clinical_item_category_id # ORDER BY # sequence_id, # event_time, # event_id query = SQLQuery() query.addSelect('pi.patient_id AS sequence_id') query.addSelect('pi.item_date AS event_time') query.addSelect('cic.description AS event_id') query.addFrom('patient_item AS pi') query.addJoin('clinical_item AS ci', 'pi.clinical_item_id = ci.clinical_item_id') query.addJoin( 'clinical_item_category AS cic', 'ci.clinical_item_category_id = cic.clinical_item_category_id') query.addOrderBy('sequence_id') query.addOrderBy('event_time') query.addOrderBy('event_id') events = DBUtil.execute(query) # Build graph based on clinical_item_category. categoryDigraph = EventDigraph(events) categoryDigraphVizFileName = "test-category-digraph.png" categoryDigraph.draw(categoryDigraphVizFileName)
# Find all clinical_item_ids associated with each patient_id # SELECT * FROM clinical_item WHERE clinical_item_category_id = 161; DATA_QUERY = SQLQuery() # From patient_item or clinical_item DATA_QUERY.addSelect("patient_id") DATA_QUERY.addSelect("clinical_item_category_id") DATA_QUERY.addSelect('name') DATA_QUERY.addSelect("description") # Join DATA_QUERY.addFrom("patient_item") DATA_QUERY.addJoin( "clinical_item", "patient_item.clinical_item_id = clinical_item.clinical_item_id", joinType="INNER") DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Med Univ (Primary)") # Everyone #DATA_QUERY.addWhereEqual("clinical_item_category_id = 161 AND description", "Tt Pamf Med (Primary)") # Expert DATA_QUERY.addOrderBy("patient_id", dir="ASC") print(DATA_QUERY) # Write out data to CSV DBUtil.runDBScript(SCRIPT_FILE, False) results = DBUtil.execute(DATA_QUERY) unique_patient_ids = {}
def grade_cases(self, sim_patient_ids, sim_grader_id, conn=None): """Given the identifiers for a bunch of simulated physician-patient case records, and the identifier for a particular grading key to use, calculate what grade each case would get based on the choices made and return a dictionary of case grades (keyed by the case ID). """ ext_conn = True if conn is None: conn = self.connFactory.connection() ext_conn = False try: # Inner query retrieves physician-patient cases with ranking group_names (to later select first) # per case for specified cases. Each NULL group_name is treated as a separate group by assigning it # sim_patient_order_id. It also omits Default user (sim_user_id = 0) from grading. inner_query = SQLQuery() inner_query.addSelect("score") inner_query.addSelect( "rank() over (" # ranks rows incrementally in the same group " partition by coalesce(group_name, sim_patient_order_id::text), sim_patient_id" " order by sim_patient_order_id" ")") inner_query.addSelect("sim_user_id") inner_query.addSelect("sim_patient_id") inner_query.addSelect("sim_grader_id") inner_query.addFrom("sim_patient_order spo") inner_query.addJoin( "sim_grading_key sgk", "sgk.clinical_item_id = spo.clinical_item_id" " and sgk.sim_state_id = spo.sim_state_id") inner_query.addWhereEqual("sgk.sim_grader_id", sim_grader_id) inner_query.addWhereNotEqual( "spo.sim_user_id", 0) # 0 = ignore 'Default user', sets up initial cases inner_query.addWhereIn("spo.sim_patient_id", sim_patient_ids) inner_query.addOrderBy("relative_time_start") inner_query.addOrderBy("sim_patient_order_id") # Outer query sums the score per patient case and selects most graded physician for the case. # Theoretically, it isn't necessarily the most active physician for the case since his orders # might have been dropped by selecting only the first record within group_name group. query = SQLQuery() query.addSelect("sim_patient_id") query.addSelect("sim_grader_id") query.addSelect("sum(score) as total_score") query.addSelect( "mode() within group (" # mode() selects most frequent value within group " order by sim_user_id" ") as most_graded_user_id") query.addFrom("(" + str(inner_query) + ") as ranked_groups") query.addWhereEqual("ranked_groups.rank", 1) # count only first order in the same group query.addGroupBy("sim_patient_id") query.addGroupBy("sim_grader_id") query_params = inner_query.getParams() + query.getParams() grades_table = DBUtil.execute(query, query_params, includeColumnNames=True, conn=conn) grades_model = modelListFromTable(grades_table) # get most active users for the cases most_active_user_query = SQLQuery() most_active_user_query.addSelect("sim_patient_id") most_active_user_query.addSelect("mode() within group (" " order by sim_user_id" ") as most_active_user_id") most_active_user_query.addFrom("sim_patient_order") most_active_user_query.addWhereNotEqual("sim_user_id", 0) # ignore Default user most_active_user_query.addWhereIn("sim_patient_id", sim_patient_ids) most_active_user_query.addGroupBy("sim_patient_id") most_active_user_query.addOrderBy("sim_patient_id") most_active_user_table = DBUtil.execute(most_active_user_query, includeColumnNames=True, conn=conn) most_active_user_model = modelListFromTable(most_active_user_table) # make a dict by sim_patient_id out of results - will be used for combining most_active_user_dict = { most_active_user["sim_patient_id"]: most_active_user for most_active_user in most_active_user_model } # combine results complete_grades = [ grade.update(most_active_user_dict[grade["sim_patient_id"]]) for grade in grades_model ] return complete_grades finally: if not ext_conn: conn.close()
def test_init(self): # Query events by clinical_item_category. # SELECT # pi.patient_id AS sequence_id, # pi.item_date AS event_time, # cic.description AS event_id # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # JOIN # clinical_item_category AS cic # ON # ci.clinical_item_category_id = cic.clinical_item_category_id # ORDER BY # sequence_id, # event_time, # event_id query = SQLQuery() query.addSelect('pi.patient_id AS sequence_id') query.addSelect('pi.item_date AS event_time') query.addSelect('cic.description AS event_id') query.addFrom('patient_item AS pi') query.addJoin('clinical_item AS ci', 'pi.clinical_item_id = ci.clinical_item_id') query.addJoin( 'clinical_item_category AS cic', 'ci.clinical_item_category_id = cic.clinical_item_category_id') query.addOrderBy('sequence_id') query.addOrderBy('event_time') query.addOrderBy('event_id') events = DBUtil.execute(query) # Build graph based on clinical_item_category. categoryDigraph = EventDigraph(events) # Sort for easier comparison against test data. actualCategoryNodes = sorted(categoryDigraph.nodes()) actualCategoryEdges = sorted(categoryDigraph.edges()) # Validate results. expectedCategoryNodes = ED_TEST_OUTPUT_TABLES['test_init'][ 'category_nodes'] self.assertEqualList(actualCategoryNodes, expectedCategoryNodes) expectedCategoryEdges = ED_TEST_OUTPUT_TABLES['test_init'][ 'category_edges'] self.assertEqualList(actualCategoryEdges, expectedCategoryEdges) # Query events by clinical_item. # SELECT # pi.patient_id AS sequence_id, # pi.item_date AS event_time, # ci.description AS event_id # FROM # patient_item AS pi # JOIN # clinical_item AS ci # ON # pi.clinical_item_id = ci.clinical_item_id # ORDER BY # sequence_id, # event_time, # event_id query = SQLQuery() query.addSelect('pi.patient_id AS sequence_id') query.addSelect('pi.item_date AS event_time') query.addSelect('ci.description AS event_id') query.addFrom('patient_item AS pi') query.addJoin('clinical_item AS ci', 'pi.clinical_item_id = ci.clinical_item_id') query.addJoin( 'clinical_item_category AS cic', 'ci.clinical_item_category_id = cic.clinical_item_category_id') query.addOrderBy('sequence_id') query.addOrderBy('event_time') query.addOrderBy('event_id') events = DBUtil.execute(query) # Build graph based on clinical_item. itemDigraph = EventDigraph(events) # Sort for easier comparison against test data. actualItemNodes = sorted(itemDigraph.nodes()) actualItemEdges = sorted(itemDigraph.edges()) # Validate results. expectedItemNodes = ED_TEST_OUTPUT_TABLES['test_init']['item_nodes'] self.assertEqualList(actualItemNodes, expectedItemNodes) expectedItemEdges = ED_TEST_OUTPUT_TABLES['test_init']['item_edges'] self.assertEqualList(actualItemEdges, expectedItemEdges)