def linage_specific_genes(taxonomy, th_max, th_min): rank = [rnk for rnk in ranks if taxonomy in get_taxonomy_list(rnk)][0] core_genes_true = set(list_core_gene(taxonomy, th_max, th_min)) other_taxon = get_taxonomy_list(rank) other_taxon.remove(taxonomy) core_genes_others = [list_core_gene(other_taxa, th_max, th_min) for other_taxa in other_taxon] core_genes_others = set([e for row in core_genes_others for e in row]) linage_specific_genes = core_genes_true - core_genes_others return linage_specific_genes
def main1(): for rank in ranks: taxonomy_list = [tax for tax in get_taxonomy_list(rank)] taxonomy_list = [tax for tax in taxonomy_list if tax in dedup] core_gene_num_list = [count_core_gene( taxonomy, 1, 0.98) for taxonomy in taxonomy_list] print(core_gene_num_list)
from Taxid_Taxonomy import get_taxonomy_list h = open("dedup.txt") dedup = [string.replace("\n", "") for string in h.readlines()] h.close() ranks = ["Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"] dic = {el[0]: el[1] for el in [ele for ele in [(taxa, rank) * (taxa in get_taxonomy_list(rank)) for rank in ranks for taxa in dedup] if ele != ()]} def taxon(rank): return [k for k, v in dic.items() if v == rank]