def test_any_multiple_columns(main_csv, gp_csv): cohort_criteria = {'any_of': [('6148', "4")], 'all_of': [], 'none_of': []} gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set( ["1037918", "1033149", "1016017", "1033388", "1030520", "1003670"]) assert exp_ids == set(ids)
def test_any_singleton_query(main_csv, gp_csv): cohort_criteria = {'any_of': [('6070', "1")], 'all_of': [], 'none_of': []} gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set([ "1041796", "1037058", "1024938", "1016017", "1038882", "1030520", "1003670", "1027017" ]) assert exp_ids == set(ids)
def test_none_basic(main_csv, gp_csv): cohort_criteria = {'none_of': [('6070', "1")], 'all_of': [], 'any_of': []} gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) print(gen_query) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) print(ids) exp_ids = set( ["1037918", "1033149", "1033388", "1031625", "1031595", "1008947"]) assert exp_ids == set(ids)
def test_any_pair_same_field_query(main_csv, gp_csv): cohort_criteria = { 'any_of': [('41270', "Block H40-H42"), ('6119', "3"), ('6148', '4')], 'all_of': [(('6070', "1"))], 'none_of': [('read_2', "XE0of")] } gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set(["1041796", "1037058", "1030520", "1003670"]) assert exp_ids == set(ids)
def test_all_gp_clinical_read2and3(main_csv, gp_csv): cohort_criteria = { 'all_of': [], 'any_of': [('read_2', "XE0of"), ('read_3', 'XE0Gu')], 'none_of': [] } gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set(["1016017", "1037918"]) assert exp_ids == set(ids)
def test_missing_fields(main_csv, gp_csv): cohort_criteria = { 'all_of': [('6070', "nan")], 'any_of': [], 'none_of': [] } gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set([]) assert exp_ids == set(ids)
def test_fields_with_spaces(main_csv, gp_csv): cohort_criteria = { 'all_of': [('41270', "Block H40-H42")], 'any_of': [], 'none_of': [] } gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set(["1033149", "1041796"]) assert exp_ids == set(ids)
def test_none_returns_empty(main_csv, gp_csv): cohort_criteria = { 'all_of': [('6148', "4")], 'any_of': [], 'none_of': [("6070", "2"), ("6070", "1")] } gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set(["1033149", "1033388"]) assert exp_ids == set(ids)
def test_all_gp_clinical_read2(main_csv, gp_csv): cohort_criteria = { 'all_of': [('read_2', "4662.")], 'any_of': [], 'none_of': [] } gen_query = query.create_queries(cohort_criteria, main_filename=main_csv, gpc_path=gp_csv) ids = query.query_databases(cohort_criteria=cohort_criteria, queries=gen_query, main_filename=main_csv, write_dir=None, gpc_path=gp_csv, out_filename=None, write=False) exp_ids = set(["1041796"]) assert exp_ids == set(ids)