def test_db_any_singleton_query(sqlite_db): cohort_criteria = {'any_of': [('6070', "1")], 'all_of': [], 'none_of': []} obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([ 1041796, 1037058, 1024938, 1016017, 1038882, 1030520, 1003670, 1027017 ]) assert exp_ids == set(obs['eid'].tolist()) obs = db.query_sqlite_db( con=sqlite_db, cohort_criteria=cohort_criteria ) #Check that any of works over fields with multiple columns
def test_db_any_pair_same_field_query(sqlite_db): cohort_criteria = { 'any_of': [('41270', "Block H40-H42"), ('6119', "3"), ('6148', '4')], 'all_of': [(('6070', "1"))], 'none_of': [('read_2', "XE0of")] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1041796, 1037058, 1030520, 1003670]) assert exp_ids == set(obs['eid'].tolist())
def test_db_all_gp_clinical_read2and3(sqlite_db): cohort_criteria = { 'all_of': [], 'any_of': [('read_2', "XE0of"), ('read_3', 'XE0Gu')], 'none_of': [] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1016017, 1037918]) assert exp_ids == set(obs['eid'].tolist())
def test_db_all_gp_clinical_read_2_multiple(sqlite_db): cohort_criteria = { 'all_of': [('read_2', "XE0of")], 'any_of': [], 'none_of': [] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1016017]) assert exp_ids == set(obs['eid'].tolist())
def test_db_all_gp_clinical_read2(sqlite_db): cohort_criteria = { 'all_of': [('read_2', "4662.")], 'any_of': [], 'none_of': [] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1041796]) assert exp_ids == set(obs['eid'].tolist())
def test_db_missing_fields2(sqlite_db): cohort_criteria = { 'all_of': [('DoesNotExist', "X")], 'any_of': [], 'none_of': [] } with pytest.raises(ValueError, match=r"DoesNotExist .*"): obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria)
def test_db_fields_with_spaces(sqlite_db): cohort_criteria = { 'all_of': [('41270', "Block H40-H42")], 'any_of': [], 'none_of': [] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1033149, 1041796]) assert exp_ids == set(obs['eid'].tolist())
def test_db_none_returns_empty(sqlite_db): cohort_criteria = { 'all_of': [('6148', "4")], 'any_of': [], 'none_of': [("6070", "2"), ("6070", "1")] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1033149, 1033388]) assert exp_ids == set(obs['eid'].tolist())
def test_db_all__none_multiple_columns(sqlite_db): cohort_criteria = { 'all_of': [('6148', "4")], 'any_of': [], 'none_of': [("6070", "2")] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1033149, 1016017, 1033388, 1030520, 1003670]) assert exp_ids == set(obs['eid'].tolist())
def test_db_any_pair_diff_field_query(sqlite_db): cohort_criteria = { 'any_of': [('6070', "1"), ('6119', "1")], 'all_of': [], 'none_of': [] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([ 1041796, 1037058, 1024938, 1016017, 1038882, 1030520, 1003670, 1027017 ]) assert exp_ids == set(obs['eid'].tolist())
def test_db_missing_fields(sqlite_db): cohort_criteria = { 'all_of': [('6070', "nan")], 'any_of': [], 'none_of': [] } obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([ 1003670, 1016017, 1024938, 1027017, 1030520, 1037058, 1037918, 1038882, 1041796 ]) assert exp_ids == set(obs['eid'].tolist())
def test_db_all_query_basic(sqlite_db): cohort_criteria = {'all_of': [('6070', "1")], 'any_of': [], 'none_of': []} obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([ 1041796, 1037058, 1024938, 1016017, 1038882, 1030520, 1003670, 1027017 ]) print("df['eid']:{}".format(obs['eid'].tolist())) print("set(obs): {}".format(set(obs['eid'].tolist()))) print(exp_ids) assert exp_ids == set(obs['eid'].tolist())
def submit_cohort_query(n: int, defined_terms: dict, all_terms: list, any_terms: list, none_terms: list, config: dict, kw_search_terms: list): """Run cohort search. Keyword arguments: ------------------ n: int indicates number of clicks of cohort search button defined_terms: dict phenotypes all_terms: list phenotypes for all participants any_terms: list phenotypes for any participants none_terms: list phenotypes for none of the participants config: dict path configuration kw_search_terms: list search terms Returns: -------- output_text: html object specifies length of IDs returned from search ids: list contains IDs returned from search """ pp = pprint.PrettyPrinter(indent=4) ctx = dash.callback_context if not ctx.triggered: raise PreventUpdate if n is None: raise PreventUpdate timestamp = datetime.now().timestamp() logic_dictionary = { 'all_of': all_terms, 'any_of': any_terms, 'none_of': none_terms } cohort_dictionaries = { "encoded": { "all_of": [], "any_of": [], "none_of": [] }, "decoded": { "all_of": [], "any_of": [], "none_of": [] } } for logic, selected_terms in logic_dictionary.items(): if selected_terms: terms_encoded, terms_decoded = _create_conditional_logic_list( selected_terms, defined_terms) cohort_dictionaries["encoded"][logic] = terms_encoded cohort_dictionaries["decoded"][logic] = terms_decoded outpath = config['cohort_path'] if kw_search_terms: term_outfile = os.path.join(outpath, 'search_terms.txt') utils.write_txt_file(term_outfile, kw_search_terms) if os.path.exists(term_outfile): print(f"successfully saved search terms to file {term_outfile}") else: print(f"could not save search terms to file {term_outfile}") for k, v in cohort_dictionaries.items(): name = "cohort_dictionary_" + k + ".txt" cohort_out = os.path.join(outpath, name) utils.write_dictionary(v, cohort_out) if os.path.exists(cohort_out): print(f"successfully saved {k} cohort dictionary to {cohort_out}") else: print(f"could not save {k} cohort dictionary to {cohort_out}") print('\ncreate_queries query_sqlite_db {}'.format(print_time())) db_filename = config['db_path'] showcase_filename = config['showcase_path'] coding_filename = config['codings_path'] res = db.query_sqlite_db(db_filename=db_filename, cohort_criteria=cohort_dictionaries['encoded']) ret = html.P(f"No matching ids found. Please change your criteria.") if res.shape[0]: t1 = tableone.TableOne(res) ret = dbc.Table.from_dataframe(pd.read_csv(StringIO(t1.to_csv())), striped=True, bordered=True, hover=True) ids = res['eid'].tolist() # print('\nfinished query_databases {}'.format(print_time())) # print('\n generating report {}'.format(print_time())) print(f"length of ids {len(ids)}") stats_dict, translation_df = stats.compute_stats_db( db_filename, ids, showcase_filename, coding_filename) # stats_fields = {"all_of": [], "any_of": [["20002", "1263"]], "none_of": []} # stats_dict, translation_df = stats.compute_stats(main_filename=config['main_path'], # eids=ids, stats_report_dict = stats.create_report(translation_df) footer = dbc.ModalFooter( dbc.Button("Close", id="close_run_query_btn_new", className="ml-auto", style={"margin": "5px"})) output_text = html.P(ret) output_runquery = dbc.Row( dbc.Col([ output_text, dbc.Button("Close", color='primary', id="run_query_close", style={"margin": "5px"}) ])) return ids, timestamp, stats_report_dict
def test_db_empty_query(sqlite_db): cohort_criteria = {'any_of': [], 'all_of': [], 'none_of': []} obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([]) assert exp_ids == set(obs['eid'].tolist())
def test_db_none_basic(sqlite_db): cohort_criteria = {'none_of': [('6070', "1")], 'all_of': [], 'any_of': []} obs = db.query_sqlite_db(con=sqlite_db, cohort_criteria=cohort_criteria) exp_ids = set([1037918, 1033149, 1033388, 1031625, 1031595, 1008947]) assert exp_ids == set(obs['eid'].tolist())