def get_ui_rest_api(args_dict, pqo, fn, enrichment_method, with_abundance=False, num_bins=NUM_BINS): df = pd.read_csv(fn, sep='\t') fg = format_for_REST_API(df.loc[df["foreground"].notnull(), "foreground"]) bg = format_for_REST_API(df.loc[df["background"].notnull(), "background"]) in_ = format_for_REST_API(df.loc[df["intensity"].notnull(), "intensity"]) if with_abundance: args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "intensity": in_, "enrichment_method": enrichment_method, "num_bins": num_bins }) return userinput.REST_API_input(pqo, args_dict=args_dict_temp) # return userinput.REST_API_input(pqo=pqo, foreground=fg, background_string=bg, background_intensity=in_, enrichment_method=enrichment_method) else: args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "enrichment_method": enrichment_method }) # return userinput.REST_API_input(pqo=pqo, foreground=fg, background_string=bg, enrichment_method=enrichment_method, num_bins=num_bins) return userinput.REST_API_input(pqo, args_dict_temp)
def get_ui_rest_api(pqo, fn, enrichment_method, with_abundance=False, num_bins=NUM_BINS): df = pd.read_csv(fn, sep='\t') fg = format_for_REST_API(df.loc[df["foreground"].notnull(), "foreground"]) bg = format_for_REST_API(df.loc[df["background"].notnull(), "background"]) in_ = format_for_REST_API(df.loc[df["intensity"].notnull(), "intensity"]) if with_abundance: return userinput.REST_API_input(pqo=pqo, foreground_string=fg, background_string=bg, background_intensity=in_, enrichment_method=enrichment_method) else: return userinput.REST_API_input(pqo=pqo, foreground_string=fg, background_string=bg, enrichment_method=enrichment_method, num_bins=num_bins)
def test_iter_bins_API_input(pqo_STRING, args_dict, foreground, background, enrichment_method): # foreground, background, enrichment_method = fg_bg_iter_bins_DFs fg = format_for_REST_API(foreground[foreground.notnull()]) bg = format_for_REST_API(background.loc[background.background.notnull(), "background"]) in_ = format_for_REST_API(background.loc[background.intensity.notnull(), "intensity"]) args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "intensity": in_, "num_bins": NUM_BINS, "enrichment_method": enrichment_method }) ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp) counter = 0 for ans, weight_fac in ui.iter_bins(): # every weighting factor is a float/int assert isinstance(weight_fac, float) or isinstance(weight_fac, int) counter += 1 # will be 101 bins number_of_bins_used = pd.cut(ui.foreground["intensity"], bins=100, retbins=True)[1].shape[0] assert counter == number_of_bins_used
def test_cleanupforanalysis_characterize_foreground_REST_API( pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict): """ python/test_userinput.py::test_cleanupforanalysis_characterize_foreground_REST_API[edge case, empty DFs with NaNs] XPASS """ foreground, background, _ = fixture_fg_bg_meth_expected_cases enrichment_method = "characterize_foreground" foreground_n = None background_n = None fg = format_for_REST_API(foreground) bg = None in_ = None args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "intensity": in_, "num_bins": NUM_BINS, "enrichment_method": enrichment_method, "foreground_n": foreground_n, "background_n": background_n }) ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 # foreground assert isinstance(foreground.iloc[0], str) # no duplicates assert foreground.duplicated().any() == False
def test_run_STRING_enrichment(pqo_STRING, STRING_examples, args_dict): """ checking that :param pqo_STRING: PersistentQuery Object :param STRING_examples: tuple (foreground ENSPs, taxid) :param args_dict: dict (from conftest.py with default values) :return : """ enrichment_method = "compare_samples" foreground, taxid = STRING_examples background = query.get_proteins_of_taxid(taxid) # background_n = pqo_STRING.get_proteome_count_from_taxid(taxid) args_dict_temp = args_dict.copy() args_dict_temp.update({"foreground":format_for_REST_API(foreground), "background":format_for_REST_API(background), "intensity":None, "enrichment_method":enrichment_method}) # ui = userinput.REST_API_input(pqo_STRING, foreground_string=format_for_REST_API(foreground), # background_string=format_for_REST_API(background), background_intensity=None, enrichment_method=enrichment_method) ui = userinput.REST_API_input(pqo_STRING, args_dict_temp) # results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, enrichment_method=enrichment_method, # limit_2_entity_type=variables.limit_2_entity_types_ALL, output_format="json", FDR_cutoff=None) args_dict_temp.update({"limit_2_entity_type":variables.limit_2_entity_types_ALL, "output_format":"json", "FDR_cutoff":None}) results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, args_dict=args_dict_temp) assert results_all_function_types != {'message': 'Internal Server Error'} etypes = variables.entity_types_with_data_in_functions_table assert len(set(results_all_function_types.keys()).intersection(etypes)) == len(etypes) for _, result in results_all_function_types.items(): # assert result is not empty assert result
def test_run_STRING_enrichment_genome(pqo_STRING, STRING_examples, args_dict): foreground, taxid = STRING_examples etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category(foreground) background_n = pqo_STRING.get_proteome_count_from_taxid(taxid) args_dict_temp = args_dict.copy() args_dict_temp.update({"foreground":format_for_REST_API(foreground), "enrichment_method":"genome", "background_n":background_n}) # ui = userinput.REST_API_input(pqo_STRING, foreground_string=format_for_REST_API(foreground), enrichment_method="genome", background_n=background_n) ui = userinput.REST_API_input(pqo_STRING, args_dict_temp) # results_all_function_types = run.run_STRING_enrichment_genome(pqo=pqo_STRING, ui=ui, taxid=taxid, background_n=background_n, output_format="json", FDR_cutoff=None) args_dict_temp.update({"taxid":taxid, "output_format":"json", "FDR_cutoff":None}) results_all_function_types = run.run_STRING_enrichment_genome(pqo=pqo_STRING, ui=ui, background_n=background_n, args_dict=args_dict_temp) assert results_all_function_types != {'message': 'Internal Server Error'} # etypes = variables.entity_types_with_data_in_functions_table # assert len(set(results_all_function_types.keys()).intersection(etypes)) == len(etypes) # incomplete overlap can be due to missing functional annotations for given ENSPs for etype, result in results_all_function_types.items(): result = ast.literal_eval(result) number_of_ENSPs_with_association = len(etype_2_association_dict[etype]) # number_of_associations = len(set(val for key, val in etype_2_association_dict[etype].items())) number_of_associations = len({item for sublist in etype_2_association_dict[etype].values() for item in sublist}) assert len(result) == number_of_associations # number of rows in results --> number of associations assert len(foreground) >= number_of_ENSPs_with_association # not every ENSP has functional associations
def test_cleanupforanalysis_abundance_correction_REST_API( pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict): """ using fixture_fg_bg_meth_all python/test_userinput.py::test_cleanupforanalysis_abundance_correction_REST_API[edge case, empty DFs with NaNs] XPASS XPASS: should fail but passes. --> should not be tested at all, but doesn't matter """ foreground, background, _ = fixture_fg_bg_meth_expected_cases enrichment_method = "abundance_correction" foreground_n = None background_n = None fg = format_for_REST_API(foreground) bg = format_for_REST_API(background["background"]) in_ = format_for_REST_API(background["intensity"]) args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "intensity": in_, "num_bins": NUM_BINS, "enrichment_method": enrichment_method, "foreground_n": foreground_n, "background_n": background_n }) ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp) assert ui.check_parse == True assert ui.check_cleanup == True # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 background = ui.background[ui.col_background] assert sum(background.isnull()) == 0 assert sum(background.notnull()) > 0 # every AN has an abundance val foreground_intensity = ui.foreground[ui.col_intensity] assert sum(foreground_intensity.isnull()) == 0 assert sum(foreground_intensity.notnull()) > 0 background_intensity = ui.background[ui.col_intensity] assert sum(background_intensity.isnull()) == 0 assert sum(background_intensity.notnull()) > 0 # foreground and background are strings and abundance values are floats assert isinstance(foreground.iloc[0], str) assert isinstance(background.iloc[0], str) assert isinstance(foreground_intensity.iloc[0], float) assert isinstance(background_intensity.iloc[0], float) # no duplicates assert foreground.duplicated().any() == False assert background.duplicated().any() == False # sorted abundance values assert non_decreasing(foreground_intensity.tolist()) == True assert non_decreasing(background_intensity.tolist()) == True
def test_ui_API_check(pqo, fixture_fg_bg_meth_all): foreground, background, enrichment_method = fixture_fg_bg_meth_all fg = format_for_REST_API(foreground[foreground.notnull()]) bg = format_for_REST_API(background.loc[background.background.notnull(), "background"]) in_ = format_for_REST_API(background.loc[background.intensity.notnull(), "intensity"]) ui = userinput.REST_API_input(pqo=pqo, foreground_string=fg, background_string=bg, background_intensity=in_, num_bins=NUM_BINS, enrichment_method=enrichment_method) assert ui.check_parse == True assert ui.check_cleanup == True assert ui.check == True
def test_random_REST_API_Input_abundance_correction( pqo_STRING, args_dict, random_abundance_correction_foreground_background): foreground, background, intensity, taxid = random_abundance_correction_foreground_background enrichment_method = "abundance_correction" args_dict["enrichment_method"] = enrichment_method args_dict["taxid"] = taxid args_dict["FDR_cutoff"] = 1 args_dict["p_value_cutoff"] = 1 args_dict["foreground"] = "%0d".join(foreground) args_dict["background"] = "%0d".join(background) args_dict["background_intensity"] = "%0d".join(intensity) ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict) assert ui.check_parse == True assert ui.check_cleanup == True num_rows, num_cols = ui.df_orig.shape assert num_cols == 3 assert num_rows >= 200
def test_EnrichmentStudy_genome(random_foreground_background, pqo_STRING, args_dict): """ checking for non empty results dictionary perc_association_foreground <= 100 perc_asociation_background <= 100 foreground_count <= foreground_n background_count <= background_n :return: """ go_slim_or_basic = "basic" o_or_u_or_both = "overrepresented" multitest_method = "benjamini_hochberg" output_format = "json" foreground, background, taxid = random_foreground_background background_n = pqo_STRING.get_proteome_count_from_taxid(int(taxid)) assert background_n == len(background) assert len(foreground) <= len(background) # ui = userinput.REST_API_input(pqo_STRING, # foreground_string=format_for_REST_API(foreground), # background_string=format_for_REST_API(background), # enrichment_method="genome") #, background_n=len(background)) args_dict_temp = args_dict.copy() args_dict_temp.update({"foreground":format_for_REST_API(foreground), "background":format_for_REST_API(background), "enrichment_method":"genome"}) ui = userinput.REST_API_input(pqo_STRING, args_dict_temp) etype_2_association_dict_foreground = pqo_STRING.get_association_dict_split_by_category(foreground) # assoc_dict = etype_2_association_dict_foreground[entity_type] etype_2_association_2_count_dict_background, etype_2_association_2_ANs_dict_background, _ = query.get_association_2_count_ANs_background_split_by_entity(taxid) for entity_type in variables.entity_types_with_data_in_functions_table: dag = run.pick_dag_from_entity_type_and_basic_or_slim(entity_type, go_slim_or_basic, pqo_STRING) assoc_dict = etype_2_association_dict_foreground[entity_type] if bool(assoc_dict): # not empty dictionary enrichment_study = enrichment.EnrichmentStudy(ui, assoc_dict, dag, o_or_u_or_both=o_or_u_or_both, multitest_method=multitest_method, entity_type=entity_type, association_2_count_dict_background=etype_2_association_2_count_dict_background[entity_type], background_n=background_n) result = enrichment_study.get_result(output_format) assert result # not an empty dict
def test_cleanupforanalysis_compare_groups_REST_API( pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict): foreground, background, _ = fixture_fg_bg_meth_expected_cases enrichment_method = "compare_groups" foreground_n = None background_n = None fg = format_for_REST_API(foreground) bg = format_for_REST_API(background["background"]) in_ = None args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "intensity": in_, "num_bins": NUM_BINS, "enrichment_method": enrichment_method, "foreground_n": foreground_n, "background_n": background_n }) ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 background = ui.background[ui.col_background] assert sum(background.isnull()) == 0 assert sum(background.notnull()) > 0 # foreground and background are strings assert isinstance(foreground.iloc[0], str) assert isinstance(background.iloc[0], str) # if there were duplicates in the original input they should still be preserved in the cleaned up DF # not equal because of splice variants # remove NaNs from df_orig foreground_df_orig = ui.df_orig[ui.col_foreground] background_df_orig = ui.df_orig[ui.col_background] assert foreground.duplicated().sum() >= foreground_df_orig[ foreground_df_orig.notnull()].duplicated().sum() assert background.duplicated().sum() >= background_df_orig[ background_df_orig.notnull()].duplicated().sum()
def test_iter_bins_API_input_missing_bin(pqo_STRING, args_dict, foreground, background, enrichment_method): """ this test only works if ANs fall within separate bins, e.g. for negative example: background intensity foreground 0 A 1.0 A 1 B 1.0 B 2 C 1.0 C """ # foreground, background, enrichment_method = fixture_fg_bg_iter_bins fg = format_for_REST_API(foreground[foreground.notnull()]) bg = format_for_REST_API(background.loc[background.background.notnull(), "background"]) in_ = format_for_REST_API(background.loc[background.intensity.notnull(), "intensity"]) # ui = userinput.REST_API_input(pqo=pqo_STRING, foreground_string=fg, background_string=bg, background_intensity=in_, num_bins=NUM_BINS, enrichment_method=enrichment_method) args_dict_temp = args_dict.copy() args_dict_temp.update({ "foreground": fg, "background": bg, "intensity": in_, "num_bins": NUM_BINS, "enrichment_method": enrichment_method }) ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp) counter = 0 for ans, weight_fac in ui.iter_bins(): # every weighting factor is a float assert isinstance(weight_fac, float) or isinstance(weight_fac, int) counter += 1 # since integers instead of floats are being used for test data, the number of unique bins can be determined by sets num_min_iterations_expected = len( {int(ele) for ele in ui.foreground["intensity"].tolist()}) assert counter >= num_min_iterations_expected