def get_ui_rest_api(args_dict,
                    pqo,
                    fn,
                    enrichment_method,
                    with_abundance=False,
                    num_bins=NUM_BINS):
    df = pd.read_csv(fn, sep='\t')
    fg = format_for_REST_API(df.loc[df["foreground"].notnull(), "foreground"])
    bg = format_for_REST_API(df.loc[df["background"].notnull(), "background"])
    in_ = format_for_REST_API(df.loc[df["intensity"].notnull(), "intensity"])
    if with_abundance:
        args_dict_temp = args_dict.copy()
        args_dict_temp.update({
            "foreground": fg,
            "background": bg,
            "intensity": in_,
            "enrichment_method": enrichment_method,
            "num_bins": num_bins
        })
        return userinput.REST_API_input(pqo, args_dict=args_dict_temp)
        # return userinput.REST_API_input(pqo=pqo, foreground=fg, background_string=bg, background_intensity=in_, enrichment_method=enrichment_method)
    else:
        args_dict_temp = args_dict.copy()
        args_dict_temp.update({
            "foreground": fg,
            "background": bg,
            "enrichment_method": enrichment_method
        })
        # return userinput.REST_API_input(pqo=pqo, foreground=fg, background_string=bg, enrichment_method=enrichment_method, num_bins=num_bins)
        return userinput.REST_API_input(pqo, args_dict_temp)
Пример #2
0
def get_ui_rest_api(pqo, fn, enrichment_method, with_abundance=False, num_bins=NUM_BINS):
    df = pd.read_csv(fn, sep='\t')
    fg = format_for_REST_API(df.loc[df["foreground"].notnull(), "foreground"])
    bg = format_for_REST_API(df.loc[df["background"].notnull(), "background"])
    in_ = format_for_REST_API(df.loc[df["intensity"].notnull(), "intensity"])
    if with_abundance:
        return userinput.REST_API_input(pqo=pqo, foreground_string=fg, background_string=bg, background_intensity=in_, enrichment_method=enrichment_method)
    else:
        return userinput.REST_API_input(pqo=pqo, foreground_string=fg, background_string=bg, enrichment_method=enrichment_method, num_bins=num_bins)
def test_iter_bins_API_input(pqo_STRING, args_dict, foreground, background,
                             enrichment_method):
    # foreground, background, enrichment_method = fg_bg_iter_bins_DFs
    fg = format_for_REST_API(foreground[foreground.notnull()])
    bg = format_for_REST_API(background.loc[background.background.notnull(),
                                            "background"])
    in_ = format_for_REST_API(background.loc[background.intensity.notnull(),
                                             "intensity"])
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({
        "foreground": fg,
        "background": bg,
        "intensity": in_,
        "num_bins": NUM_BINS,
        "enrichment_method": enrichment_method
    })
    ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp)
    counter = 0
    for ans, weight_fac in ui.iter_bins():
        # every weighting factor is a float/int
        assert isinstance(weight_fac, float) or isinstance(weight_fac, int)
        counter += 1
    # will be 101 bins
    number_of_bins_used = pd.cut(ui.foreground["intensity"],
                                 bins=100,
                                 retbins=True)[1].shape[0]
    assert counter == number_of_bins_used
def test_cleanupforanalysis_characterize_foreground_REST_API(
        pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict):
    """
    python/test_userinput.py::test_cleanupforanalysis_characterize_foreground_REST_API[edge case, empty DFs with NaNs] XPASS
    """
    foreground, background, _ = fixture_fg_bg_meth_expected_cases
    enrichment_method = "characterize_foreground"
    foreground_n = None
    background_n = None
    fg = format_for_REST_API(foreground)
    bg = None
    in_ = None
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({
        "foreground": fg,
        "background": bg,
        "intensity": in_,
        "num_bins": NUM_BINS,
        "enrichment_method": enrichment_method,
        "foreground_n": foreground_n,
        "background_n": background_n
    })
    ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp)

    # no NaNs where ANs are expected
    foreground = ui.foreground[ui.col_foreground]
    assert sum(foreground.isnull()) == 0
    assert sum(foreground.notnull()) > 0

    # foreground
    assert isinstance(foreground.iloc[0], str)

    # no duplicates
    assert foreground.duplicated().any() == False
Пример #5
0
def test_run_STRING_enrichment(pqo_STRING, STRING_examples, args_dict):
    """
    checking that
    :param pqo_STRING: PersistentQuery Object
    :param STRING_examples: tuple (foreground ENSPs, taxid)
    :param args_dict: dict (from conftest.py with default values)
    :return
    :
    """
    enrichment_method = "compare_samples"
    foreground, taxid = STRING_examples
    background = query.get_proteins_of_taxid(taxid)
    # background_n = pqo_STRING.get_proteome_count_from_taxid(taxid)
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({"foreground":format_for_REST_API(foreground),
                           "background":format_for_REST_API(background),
                           "intensity":None,
                           "enrichment_method":enrichment_method})
    # ui = userinput.REST_API_input(pqo_STRING, foreground_string=format_for_REST_API(foreground),
    #     background_string=format_for_REST_API(background), background_intensity=None, enrichment_method=enrichment_method)
    ui = userinput.REST_API_input(pqo_STRING, args_dict_temp)
    # results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, enrichment_method=enrichment_method,
    #     limit_2_entity_type=variables.limit_2_entity_types_ALL, output_format="json", FDR_cutoff=None)
    args_dict_temp.update({"limit_2_entity_type":variables.limit_2_entity_types_ALL,
                           "output_format":"json",
                           "FDR_cutoff":None})
    results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, args_dict=args_dict_temp)
    assert results_all_function_types  != {'message': 'Internal Server Error'}
    etypes = variables.entity_types_with_data_in_functions_table
    assert len(set(results_all_function_types.keys()).intersection(etypes)) == len(etypes)
    for _, result in results_all_function_types.items():
        # assert result is not empty
        assert result
Пример #6
0
def test_run_STRING_enrichment_genome(pqo_STRING, STRING_examples, args_dict):
    foreground, taxid = STRING_examples
    etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category(foreground)
    background_n = pqo_STRING.get_proteome_count_from_taxid(taxid)
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({"foreground":format_for_REST_API(foreground),
                           "enrichment_method":"genome",
                           "background_n":background_n})
    # ui = userinput.REST_API_input(pqo_STRING, foreground_string=format_for_REST_API(foreground), enrichment_method="genome", background_n=background_n)
    ui = userinput.REST_API_input(pqo_STRING, args_dict_temp)
    # results_all_function_types = run.run_STRING_enrichment_genome(pqo=pqo_STRING, ui=ui, taxid=taxid, background_n=background_n, output_format="json", FDR_cutoff=None)
    args_dict_temp.update({"taxid":taxid,
                           "output_format":"json",
                           "FDR_cutoff":None})
    results_all_function_types = run.run_STRING_enrichment_genome(pqo=pqo_STRING, ui=ui, background_n=background_n, args_dict=args_dict_temp)
    assert results_all_function_types  != {'message': 'Internal Server Error'}
    # etypes = variables.entity_types_with_data_in_functions_table
    # assert len(set(results_all_function_types.keys()).intersection(etypes)) == len(etypes) # incomplete overlap can be due to missing functional annotations for given ENSPs
    for etype, result in results_all_function_types.items():
        result = ast.literal_eval(result)
        number_of_ENSPs_with_association = len(etype_2_association_dict[etype])
        # number_of_associations = len(set(val for key, val in etype_2_association_dict[etype].items()))
        number_of_associations = len({item for sublist in etype_2_association_dict[etype].values() for item in sublist})
        assert len(result) == number_of_associations # number of rows in results --> number of associations
        assert len(foreground) >= number_of_ENSPs_with_association # not every ENSP has functional associations
def test_cleanupforanalysis_abundance_correction_REST_API(
        pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict):
    """
    using fixture_fg_bg_meth_all
    python/test_userinput.py::test_cleanupforanalysis_abundance_correction_REST_API[edge case, empty DFs with NaNs] XPASS
    XPASS: should fail but passes.
    --> should not be tested at all, but doesn't matter
    """
    foreground, background, _ = fixture_fg_bg_meth_expected_cases
    enrichment_method = "abundance_correction"
    foreground_n = None
    background_n = None
    fg = format_for_REST_API(foreground)
    bg = format_for_REST_API(background["background"])
    in_ = format_for_REST_API(background["intensity"])
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({
        "foreground": fg,
        "background": bg,
        "intensity": in_,
        "num_bins": NUM_BINS,
        "enrichment_method": enrichment_method,
        "foreground_n": foreground_n,
        "background_n": background_n
    })
    ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp)
    assert ui.check_parse == True
    assert ui.check_cleanup == True

    # no NaNs where ANs are expected
    foreground = ui.foreground[ui.col_foreground]
    assert sum(foreground.isnull()) == 0
    assert sum(foreground.notnull()) > 0
    background = ui.background[ui.col_background]
    assert sum(background.isnull()) == 0
    assert sum(background.notnull()) > 0

    # every AN has an abundance val
    foreground_intensity = ui.foreground[ui.col_intensity]
    assert sum(foreground_intensity.isnull()) == 0
    assert sum(foreground_intensity.notnull()) > 0
    background_intensity = ui.background[ui.col_intensity]
    assert sum(background_intensity.isnull()) == 0
    assert sum(background_intensity.notnull()) > 0

    # foreground and background are strings and abundance values are floats
    assert isinstance(foreground.iloc[0], str)
    assert isinstance(background.iloc[0], str)
    assert isinstance(foreground_intensity.iloc[0], float)
    assert isinstance(background_intensity.iloc[0], float)

    # no duplicates
    assert foreground.duplicated().any() == False
    assert background.duplicated().any() == False

    # sorted abundance values
    assert non_decreasing(foreground_intensity.tolist()) == True
    assert non_decreasing(background_intensity.tolist()) == True
Пример #8
0
def test_ui_API_check(pqo, fixture_fg_bg_meth_all):
    foreground, background, enrichment_method = fixture_fg_bg_meth_all
    fg = format_for_REST_API(foreground[foreground.notnull()])
    bg = format_for_REST_API(background.loc[background.background.notnull(), "background"])
    in_ = format_for_REST_API(background.loc[background.intensity.notnull(), "intensity"])

    ui = userinput.REST_API_input(pqo=pqo, foreground_string=fg, background_string=bg, background_intensity=in_, num_bins=NUM_BINS, enrichment_method=enrichment_method)
    assert ui.check_parse == True
    assert ui.check_cleanup == True
    assert ui.check == True
def test_random_REST_API_Input_abundance_correction(
        pqo_STRING, args_dict,
        random_abundance_correction_foreground_background):
    foreground, background, intensity, taxid = random_abundance_correction_foreground_background
    enrichment_method = "abundance_correction"
    args_dict["enrichment_method"] = enrichment_method
    args_dict["taxid"] = taxid
    args_dict["FDR_cutoff"] = 1
    args_dict["p_value_cutoff"] = 1
    args_dict["foreground"] = "%0d".join(foreground)
    args_dict["background"] = "%0d".join(background)
    args_dict["background_intensity"] = "%0d".join(intensity)
    ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict)
    assert ui.check_parse == True
    assert ui.check_cleanup == True
    num_rows, num_cols = ui.df_orig.shape
    assert num_cols == 3
    assert num_rows >= 200
Пример #10
0
def test_EnrichmentStudy_genome(random_foreground_background, pqo_STRING, args_dict):
    """
    checking for non empty results dictionary
    perc_association_foreground <= 100
    perc_asociation_background <= 100
    foreground_count <= foreground_n
    background_count <= background_n
    :return:
    """
    go_slim_or_basic = "basic"
    o_or_u_or_both = "overrepresented"
    multitest_method = "benjamini_hochberg"
    output_format = "json"
    foreground, background, taxid = random_foreground_background
    background_n = pqo_STRING.get_proteome_count_from_taxid(int(taxid))
    assert background_n == len(background)
    assert len(foreground) <= len(background)
    # ui = userinput.REST_API_input(pqo_STRING,
    #     foreground_string=format_for_REST_API(foreground),
    #     background_string=format_for_REST_API(background),
    #     enrichment_method="genome") #, background_n=len(background))

    args_dict_temp = args_dict.copy()
    args_dict_temp.update({"foreground":format_for_REST_API(foreground),
                           "background":format_for_REST_API(background),
                           "enrichment_method":"genome"})
    ui = userinput.REST_API_input(pqo_STRING, args_dict_temp)

    etype_2_association_dict_foreground = pqo_STRING.get_association_dict_split_by_category(foreground)
    # assoc_dict = etype_2_association_dict_foreground[entity_type]

    etype_2_association_2_count_dict_background, etype_2_association_2_ANs_dict_background, _ = query.get_association_2_count_ANs_background_split_by_entity(taxid)
    for entity_type in variables.entity_types_with_data_in_functions_table:
        dag = run.pick_dag_from_entity_type_and_basic_or_slim(entity_type, go_slim_or_basic, pqo_STRING)
        assoc_dict = etype_2_association_dict_foreground[entity_type]
        if bool(assoc_dict): # not empty dictionary
            enrichment_study = enrichment.EnrichmentStudy(ui, assoc_dict, dag,
                o_or_u_or_both=o_or_u_or_both,
                multitest_method=multitest_method,
                entity_type=entity_type,
                association_2_count_dict_background=etype_2_association_2_count_dict_background[entity_type],
                background_n=background_n)
            result = enrichment_study.get_result(output_format)
            assert result # not an empty dict
Пример #11
0
def test_cleanupforanalysis_compare_groups_REST_API(
        pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict):
    foreground, background, _ = fixture_fg_bg_meth_expected_cases
    enrichment_method = "compare_groups"
    foreground_n = None
    background_n = None
    fg = format_for_REST_API(foreground)
    bg = format_for_REST_API(background["background"])
    in_ = None
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({
        "foreground": fg,
        "background": bg,
        "intensity": in_,
        "num_bins": NUM_BINS,
        "enrichment_method": enrichment_method,
        "foreground_n": foreground_n,
        "background_n": background_n
    })
    ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp)

    # no NaNs where ANs are expected
    foreground = ui.foreground[ui.col_foreground]
    assert sum(foreground.isnull()) == 0
    assert sum(foreground.notnull()) > 0
    background = ui.background[ui.col_background]
    assert sum(background.isnull()) == 0
    assert sum(background.notnull()) > 0

    # foreground and background are strings
    assert isinstance(foreground.iloc[0], str)
    assert isinstance(background.iloc[0], str)

    # if there were duplicates in the original input they should still be preserved in the cleaned up DF
    # not equal because of splice variants
    # remove NaNs from df_orig
    foreground_df_orig = ui.df_orig[ui.col_foreground]
    background_df_orig = ui.df_orig[ui.col_background]
    assert foreground.duplicated().sum() >= foreground_df_orig[
        foreground_df_orig.notnull()].duplicated().sum()
    assert background.duplicated().sum() >= background_df_orig[
        background_df_orig.notnull()].duplicated().sum()
Пример #12
0
def test_iter_bins_API_input_missing_bin(pqo_STRING, args_dict, foreground,
                                         background, enrichment_method):
    """
    this test only works if ANs fall within separate bins,
    e.g. for negative example:
       background  intensity foreground
    0           A        1.0          A
    1           B        1.0          B
    2           C        1.0          C
    """
    # foreground, background, enrichment_method = fixture_fg_bg_iter_bins
    fg = format_for_REST_API(foreground[foreground.notnull()])
    bg = format_for_REST_API(background.loc[background.background.notnull(),
                                            "background"])
    in_ = format_for_REST_API(background.loc[background.intensity.notnull(),
                                             "intensity"])

    # ui = userinput.REST_API_input(pqo=pqo_STRING, foreground_string=fg, background_string=bg, background_intensity=in_, num_bins=NUM_BINS, enrichment_method=enrichment_method)
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({
        "foreground": fg,
        "background": bg,
        "intensity": in_,
        "num_bins": NUM_BINS,
        "enrichment_method": enrichment_method
    })
    ui = userinput.REST_API_input(pqo_STRING, args_dict=args_dict_temp)

    counter = 0
    for ans, weight_fac in ui.iter_bins():
        # every weighting factor is a float
        assert isinstance(weight_fac, float) or isinstance(weight_fac, int)
        counter += 1
    # since integers instead of floats are being used for test data, the number of unique bins can be determined by sets
    num_min_iterations_expected = len(
        {int(ele)
         for ele in ui.foreground["intensity"].tolist()})
    assert counter >= num_min_iterations_expected