示例#1
0
def random_foreground_background(
):  # used TaxIDs fixture previously, but now it is random on TaxID level as well
    for _ in range(10):
        taxid = random.choice(query.get_taxids())  # read_from_flat_files=True
        background = query.get_proteins_of_taxid(taxid)
        foreground = random.sample(background, 200)
        return foreground, background, taxid
示例#2
0
def test_ENSP_consistency_of_DB():
    """
    - ENSPs of taxid_2_protein_table are the superset of ENSPs of protein_2_function_table

    foreground with functional association also has to be in the precomputed background
    TaxID_2_Protein_table_STRING: ENSPs expected to be the superset of Protein_2_Function_table_STRING
    Protein_2_Function_table_STRING
    Function_2_ENSP_table_STRING
    """
    for taxid in query.get_taxids():
        ensp_taxid_2_protein = set(query.get_proteins_of_taxid(taxid))
        ensp_protein_2_function = {
            ele[0]
            for ele in query.get_results_of_statement(
                "SELECT protein_2_function.an FROM protein_2_function WHERE protein_2_function.an ~ '^{}\.'"
                .format(taxid))
        }
        # ensp_function_2_ensp = None
        len_ensp_taxid_2_protein = len(ensp_taxid_2_protein)
        len_ensp_protein_2_function = len(ensp_protein_2_function)
        assert len_ensp_taxid_2_protein >= len_ensp_protein_2_function
        assert len(ensp_taxid_2_protein.intersection(
            ensp_protein_2_function)) == len_ensp_protein_2_function
        assert len(ensp_taxid_2_protein.union(
            ensp_protein_2_function)) == len_ensp_taxid_2_protein
示例#3
0
def test_run_STRING_enrichment(pqo_STRING, STRING_examples, args_dict):
    """
    checking that
    :param pqo_STRING: PersistentQuery Object
    :param STRING_examples: tuple (foreground ENSPs, taxid)
    :param args_dict: dict (from conftest.py with default values)
    :return
    :
    """
    enrichment_method = "compare_samples"
    foreground, taxid = STRING_examples
    background = query.get_proteins_of_taxid(taxid)
    # background_n = pqo_STRING.get_proteome_count_from_taxid(taxid)
    args_dict_temp = args_dict.copy()
    args_dict_temp.update({"foreground":format_for_REST_API(foreground),
                           "background":format_for_REST_API(background),
                           "intensity":None,
                           "enrichment_method":enrichment_method})
    # ui = userinput.REST_API_input(pqo_STRING, foreground_string=format_for_REST_API(foreground),
    #     background_string=format_for_REST_API(background), background_intensity=None, enrichment_method=enrichment_method)
    ui = userinput.REST_API_input(pqo_STRING, args_dict_temp)
    # results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, enrichment_method=enrichment_method,
    #     limit_2_entity_type=variables.limit_2_entity_types_ALL, output_format="json", FDR_cutoff=None)
    args_dict_temp.update({"limit_2_entity_type":variables.limit_2_entity_types_ALL,
                           "output_format":"json",
                           "FDR_cutoff":None})
    results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, args_dict=args_dict_temp)
    assert results_all_function_types  != {'message': 'Internal Server Error'}
    etypes = variables.entity_types_with_data_in_functions_table
    assert len(set(results_all_function_types.keys()).intersection(etypes)) == len(etypes)
    for _, result in results_all_function_types.items():
        # assert result is not empty
        assert result
示例#4
0
def random_abundance_correction_foreground_background():
    for _ in range(10):
        taxid = random.choice(query.get_taxids())  # read_from_flat_files=True
        background = query.get_proteins_of_taxid(taxid)
        foreground = random.sample(background, 200)
        intensity = [
            str(ele) for ele in np.random.normal(size=len(background))
        ]
        return foreground, background, intensity, taxid
def create_functions_2_ENSP_table():
    for taxid in fileinput.input():
        taxid = taxid.strip()
        ans_list = sorted(query.get_proteins_of_taxid(taxid))
        etype_2_association_dict = query.PersistentQueryObject_STRING.get_association_dict_split_by_category(ans_list)
        for etype in sorted(variables.entity_types_with_data_in_functions_table):
            assoc_dict = etype_2_association_dict[etype]
            association_2_count_dict, association_2_ANs_dict, ans_counter = count_terms_v3(set(ans_list), assoc_dict)
            for association, ans in association_2_ANs_dict.items():
                assert ans_counter >= association_2_count_dict[association]
                print(str(taxid) + "\t" + str(etype) + "\t" + association + "\t" + str(association_2_count_dict[association]) + "\t" + str(ans_counter) + "\t" + "{" + str(sorted(ans))[1:-1].replace(" ", "").replace("'", '"') + "}")
示例#6
0
def test_precomputed_associations_counts(pqo_STRING, TaxIDs):
    taxid = TaxIDs
    ENSPs_proteome = query.get_proteins_of_taxid(taxid)
    # A
    etype_2_association_2_count_dict_background, etype_2_association_2_ANs_dict_background, etype_2_background_n = query.get_association_2_count_ANs_background_split_by_entity(
        taxid)
    etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category(
        set(ENSPs_proteome))
    for etype in variables.entity_types_with_data_in_functions_table:
        # B
        association_2_count_dict, association_2_ANs_dict, ans_counter = ratio.count_terms_v3(
            set(ENSPs_proteome), etype_2_association_dict[etype])
        assert association_2_count_dict == etype_2_association_2_count_dict_background[
            etype]
        assert association_2_ANs_dict == etype_2_association_2_ANs_dict_background[
            etype]
示例#7
0

# UniProt_IDs_human_list = sorted(query.get_proteins_of_taxid(9606, read_from_flat_files=True))
# ENSP_human_list = sorted(query.get_proteins_of_human())
#
# @pytest.fixture(scope="session")
# def UniProt_IDs_human():
#     return UniProt_IDs_human_list
#
# @pytest.fixture(scope="session")
# def ENSPs_human():
#     return ENSP_human_list

### preloaded objects --> put into conftest.py?
UniProt_IDs_human_list = sorted(
    query.get_proteins_of_taxid(9606, read_from_flat_files=True))
ENSP_human_list = sorted(query.get_proteins_of_human())
###


def get_random_human_ENSP(num_ENSPs=20,
                          joined_for_web=False,
                          contiguous=False,
                          UniProt_ID=False,
                          UniProt_IDs_human_list=UniProt_IDs_human_list,
                          ENSP_human_list=ENSP_human_list):
    if UniProt_ID:
        IDs_2_sample = UniProt_IDs_human_list
    else:
        IDs_2_sample = ENSP_human_list
    max_index = len(IDs_2_sample)