def random_foreground_background( ): # used TaxIDs fixture previously, but now it is random on TaxID level as well for _ in range(10): taxid = random.choice(query.get_taxids()) # read_from_flat_files=True background = query.get_proteins_of_taxid(taxid) foreground = random.sample(background, 200) return foreground, background, taxid
def test_ENSP_consistency_of_DB(): """ - ENSPs of taxid_2_protein_table are the superset of ENSPs of protein_2_function_table foreground with functional association also has to be in the precomputed background TaxID_2_Protein_table_STRING: ENSPs expected to be the superset of Protein_2_Function_table_STRING Protein_2_Function_table_STRING Function_2_ENSP_table_STRING """ for taxid in query.get_taxids(): ensp_taxid_2_protein = set(query.get_proteins_of_taxid(taxid)) ensp_protein_2_function = { ele[0] for ele in query.get_results_of_statement( "SELECT protein_2_function.an FROM protein_2_function WHERE protein_2_function.an ~ '^{}\.'" .format(taxid)) } # ensp_function_2_ensp = None len_ensp_taxid_2_protein = len(ensp_taxid_2_protein) len_ensp_protein_2_function = len(ensp_protein_2_function) assert len_ensp_taxid_2_protein >= len_ensp_protein_2_function assert len(ensp_taxid_2_protein.intersection( ensp_protein_2_function)) == len_ensp_protein_2_function assert len(ensp_taxid_2_protein.union( ensp_protein_2_function)) == len_ensp_taxid_2_protein
def test_run_STRING_enrichment(pqo_STRING, STRING_examples, args_dict): """ checking that :param pqo_STRING: PersistentQuery Object :param STRING_examples: tuple (foreground ENSPs, taxid) :param args_dict: dict (from conftest.py with default values) :return : """ enrichment_method = "compare_samples" foreground, taxid = STRING_examples background = query.get_proteins_of_taxid(taxid) # background_n = pqo_STRING.get_proteome_count_from_taxid(taxid) args_dict_temp = args_dict.copy() args_dict_temp.update({"foreground":format_for_REST_API(foreground), "background":format_for_REST_API(background), "intensity":None, "enrichment_method":enrichment_method}) # ui = userinput.REST_API_input(pqo_STRING, foreground_string=format_for_REST_API(foreground), # background_string=format_for_REST_API(background), background_intensity=None, enrichment_method=enrichment_method) ui = userinput.REST_API_input(pqo_STRING, args_dict_temp) # results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, enrichment_method=enrichment_method, # limit_2_entity_type=variables.limit_2_entity_types_ALL, output_format="json", FDR_cutoff=None) args_dict_temp.update({"limit_2_entity_type":variables.limit_2_entity_types_ALL, "output_format":"json", "FDR_cutoff":None}) results_all_function_types = run.run_STRING_enrichment(pqo=pqo_STRING, ui=ui, args_dict=args_dict_temp) assert results_all_function_types != {'message': 'Internal Server Error'} etypes = variables.entity_types_with_data_in_functions_table assert len(set(results_all_function_types.keys()).intersection(etypes)) == len(etypes) for _, result in results_all_function_types.items(): # assert result is not empty assert result
def random_abundance_correction_foreground_background(): for _ in range(10): taxid = random.choice(query.get_taxids()) # read_from_flat_files=True background = query.get_proteins_of_taxid(taxid) foreground = random.sample(background, 200) intensity = [ str(ele) for ele in np.random.normal(size=len(background)) ] return foreground, background, intensity, taxid
def create_functions_2_ENSP_table(): for taxid in fileinput.input(): taxid = taxid.strip() ans_list = sorted(query.get_proteins_of_taxid(taxid)) etype_2_association_dict = query.PersistentQueryObject_STRING.get_association_dict_split_by_category(ans_list) for etype in sorted(variables.entity_types_with_data_in_functions_table): assoc_dict = etype_2_association_dict[etype] association_2_count_dict, association_2_ANs_dict, ans_counter = count_terms_v3(set(ans_list), assoc_dict) for association, ans in association_2_ANs_dict.items(): assert ans_counter >= association_2_count_dict[association] print(str(taxid) + "\t" + str(etype) + "\t" + association + "\t" + str(association_2_count_dict[association]) + "\t" + str(ans_counter) + "\t" + "{" + str(sorted(ans))[1:-1].replace(" ", "").replace("'", '"') + "}")
def test_precomputed_associations_counts(pqo_STRING, TaxIDs): taxid = TaxIDs ENSPs_proteome = query.get_proteins_of_taxid(taxid) # A etype_2_association_2_count_dict_background, etype_2_association_2_ANs_dict_background, etype_2_background_n = query.get_association_2_count_ANs_background_split_by_entity( taxid) etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category( set(ENSPs_proteome)) for etype in variables.entity_types_with_data_in_functions_table: # B association_2_count_dict, association_2_ANs_dict, ans_counter = ratio.count_terms_v3( set(ENSPs_proteome), etype_2_association_dict[etype]) assert association_2_count_dict == etype_2_association_2_count_dict_background[ etype] assert association_2_ANs_dict == etype_2_association_2_ANs_dict_background[ etype]
# UniProt_IDs_human_list = sorted(query.get_proteins_of_taxid(9606, read_from_flat_files=True)) # ENSP_human_list = sorted(query.get_proteins_of_human()) # # @pytest.fixture(scope="session") # def UniProt_IDs_human(): # return UniProt_IDs_human_list # # @pytest.fixture(scope="session") # def ENSPs_human(): # return ENSP_human_list ### preloaded objects --> put into conftest.py? UniProt_IDs_human_list = sorted( query.get_proteins_of_taxid(9606, read_from_flat_files=True)) ENSP_human_list = sorted(query.get_proteins_of_human()) ### def get_random_human_ENSP(num_ENSPs=20, joined_for_web=False, contiguous=False, UniProt_ID=False, UniProt_IDs_human_list=UniProt_IDs_human_list, ENSP_human_list=ENSP_human_list): if UniProt_ID: IDs_2_sample = UniProt_IDs_human_list else: IDs_2_sample = ENSP_human_list max_index = len(IDs_2_sample)