def test_assc_stats(prt=sys.stdout): """Test association statistics.""" associations = [ ('hsa', 'goa_human.gaf'), # human ('mus', 'mgi.gaf'), # mouse ('dme', 'fb.gaf')] # fly godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None) describe_go2obj(godag, prt) obj = StatsDescribe('Assc', "{:6,}") obj.prt_hdr(prt, "Assc.") for org, assc_name in associations: fin_assc = os.path.join(REPO, assc_name) describe_assc(org, fin_assc, godag, obj, prt)
def test_assc_stats(prt=sys.stdout): """Test association statistics.""" associations = [ ('hsa', 'goa_human.gaf'), # human ('mus', 'mgi.gaf'), # mouse ('dme', 'fb.gaf') ] # fly godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None) describe_go2obj(godag, prt) obj = StatsDescribe('Assc', "{:6,}") obj.prt_hdr(prt, "Assc.") for org, assc_name in associations: fin_assc = os.path.join(REPO, assc_name) describe_assc(org, fin_assc, godag, obj, prt)
def test_statsdescribe(): """Use StatsDescribe to create a markdown table. fdr_bh name | # fdr_bh | range of fdr_bh | 25th perc| median | 75th perc| mean | stddev ---------|----------|----------------------|----------|----------|----------|----------|--------- GOATOOLS | 59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02 """ #pylint: disable=no-member nts_goids = [nt for nt in nts if nt.p_fdr_bh < 0.05] fdr_vals = [nt.p_fdr_bh for nt in nts_goids] statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}") statsobj.prt_hdr() statsobj.prt_data("GOATOOLS", fdr_vals)
def test_statsdescribe(): """Use StatsDescribe to create a markdown table. fdr_bh name | # fdr_bh | range of fdr_bh | 25th perc| median | 75th perc| mean | stddev ---------|----------|----------------------|----------|----------|----------|----------|--------- GOATOOLS | 59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02 """ #pylint: disable=no-member # Somehow goea_results contains fields of empty string, which we can check with: # print([(nt.GO, nt.p_fdr_bh) for nt in goea_results]) nts_goids = [nt for nt in goea_results if nt.p_fdr_bh != '' and nt.p_fdr_bh < 0.05] fdr_vals = [nt.p_fdr_bh for nt in nts_goids] statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}") statsobj.prt_hdr() statsobj.prt_data("GOATOOLS", fdr_vals)
def describe_go2obj(go2obj, prt): """Describe distribution of parent and child GO term counts.""" # Related GO | # GO | range | 25th | median | 75th | mean | stddev # -----------|-------|----------|------|--------|------|------|------- # Parents | 44961 | 0 to 8 | 1 | 1 | 2 | 2 | 1 # Children | 17597 | 1 to 480 | 1 | 2 | 4 | 4 | 10 cnts_all = [(len(o.children), len(o.parents)) for go, o in go2obj.items() if go == o.id] cnts_c, cnts_p = zip(*cnts_all) cnts_c = [n for n in cnts_c if n != 0] # Remove leaf-level counts from reported stats cnts_p = [n for n in cnts_p if n != 0] # Remove top-level counts from reported stats obj = StatsDescribe('GO', "{:6,}") obj.prt_hdr(prt, "Related GO") obj.prt_data("Parents", cnts_p, prt) obj.prt_data("Children", cnts_c, prt)
def prt_experiments_stats(self, prt=sys.stdout, attrs=None, genes_goids='genes'): """Print stats for user-specified data in experiment sets.""" if attrs is None: attrs = ["fdr_actual", "frr_actual", "num_Type_I", "num_Type_II", "num_correct"] hdrexps = "Nul(% max) #pval #tests" # Header for col0, the description of the statistic namefmt = "{PERCNULL:3}% {EXP_ALPHA:5.3f} {QTY:5}" for attrname in attrs: prt.write("\n{ATTR} statistics:\n".format(ATTR=attrname)) objstat = StatsDescribe("exps", "{:10.2f}" if attrname[:3] == "num" else "{:6.4f}") objstat.prt_hdr(prt, hdrexps) for experiment_set in self.expsets: # ExperimentSet expname = experiment_set.get_desc(namefmt) means = experiment_set.get_means(attrname, genes_goids) objstat.prt_data(expname, means, prt)
def test_statsdescribe(): """Use StatsDescribe to create a markdown table. fdr_bh name | # fdr_bh | range of fdr_bh | 25th perc| median | 75th perc| mean | stddev ---------|----------|----------------------|----------|----------|----------|----------|--------- GOATOOLS | 59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02 """ #pylint: disable=no-member # Somehow nts contains fields of empty string, which we can check with: # print([(nt.GO, nt.p_fdr_bh) for nt in nts]) nts_goids = [nt for nt in nts if nt.p_fdr_bh != '' and nt.p_fdr_bh < 0.05] fdr_vals = [nt.p_fdr_bh for nt in nts_goids] statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}") statsobj.prt_hdr() statsobj.prt_data("GOATOOLS", fdr_vals)
def main(prt=sys.stdout): """Statistics for the protein-coding mouse gene association.""" godag = get_godag() params = { 'association_file': os.path.join(REPO, 'gene_association.mgi'), 'genes_population': ensm2nt.keys() } # Population genes objassc = DataAssc(params, godag) # Statistics for number of genes per GO in the mouse association for protein-coding genes go2numgenes = {go: len(genes) for go, genes in objassc.go2genes.items()} objdesc = StatsDescribe("GOs", "{:>5.0f}") objdesc.prt_hdr(prt, name="\nname ") objdesc.prt_data("# genes/GO", go2numgenes.values(), prt) # Statistics for number of GOs per gene in the mouse association for protein-coding genes gene2numgos = { gene: len(gos) for gene, gos in objassc.objassc_all.assc_geneid2gos.items() } objdesc = StatsDescribe("genes", "{:>5.0f}") objdesc.prt_hdr(prt, name="\nname ") objdesc.prt_data("# GOs/gene", gene2numgos.values(), prt) # Percentage of Ensembl mouse genes covered by GO annotations num_pc = len(params['genes_population']) num_assc = len(objassc.objassc_all.assc_geneid2gos) prt.write( "{PERC:2.0f}% of {A} of {P} Mouse protein-coding genes are annotated by GO IDs.\n" .format(PERC=100.0 * num_assc / num_pc, P=num_pc, A=num_assc))