示例#1
0
def test_assc_stats(prt=sys.stdout):
    """Test association statistics."""
    associations = [
        ('hsa', 'goa_human.gaf'), # human
        ('mus', 'mgi.gaf'),       # mouse
        ('dme', 'fb.gaf')]        # fly
    godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
    describe_go2obj(godag, prt)
    obj = StatsDescribe('Assc', "{:6,}")
    obj.prt_hdr(prt, "Assc.")
    for org, assc_name in associations:
        fin_assc = os.path.join(REPO, assc_name)
        describe_assc(org, fin_assc, godag, obj, prt)
示例#2
0
def test_assc_stats(prt=sys.stdout):
    """Test association statistics."""
    associations = [
        ('hsa', 'goa_human.gaf'),  # human
        ('mus', 'mgi.gaf'),  # mouse
        ('dme', 'fb.gaf')
    ]  # fly
    godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
    describe_go2obj(godag, prt)
    obj = StatsDescribe('Assc', "{:6,}")
    obj.prt_hdr(prt, "Assc.")
    for org, assc_name in associations:
        fin_assc = os.path.join(REPO, assc_name)
        describe_assc(org, fin_assc, godag, obj, prt)
示例#3
0
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

fdr_bh
name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
---------|----------|----------------------|----------|----------|----------|----------|---------
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    """
    #pylint: disable=no-member
    nts_goids = [nt for nt in nts if nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_hdr()
    statsobj.prt_data("GOATOOLS", fdr_vals)
示例#4
0
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

fdr_bh
name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
---------|----------|----------------------|----------|----------|----------|----------|---------
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    """
    #pylint: disable=no-member
    # Somehow goea_results contains fields of empty string, which we can check with:
    # print([(nt.GO, nt.p_fdr_bh) for nt in goea_results])
    nts_goids = [nt for nt in goea_results if nt.p_fdr_bh != '' and nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_hdr()
    statsobj.prt_data("GOATOOLS", fdr_vals)
示例#5
0
def describe_go2obj(go2obj, prt):
    """Describe distribution of parent and child GO term counts."""
    # Related GO | # GO  | range    | 25th | median | 75th | mean | stddev
    # -----------|-------|----------|------|--------|------|------|-------
    # Parents    | 44961 | 0 to   8 |    1 |      1 |    2 |    2 |      1
    # Children   | 17597 | 1 to 480 |    1 |      2 |    4 |    4 |     10
    cnts_all = [(len(o.children), len(o.parents)) for go, o in go2obj.items()
                if go == o.id]
    cnts_c, cnts_p = zip(*cnts_all)
    cnts_c = [n for n in cnts_c
              if n != 0]  # Remove leaf-level counts from reported stats
    cnts_p = [n for n in cnts_p
              if n != 0]  # Remove top-level counts from reported stats
    obj = StatsDescribe('GO', "{:6,}")
    obj.prt_hdr(prt, "Related GO")
    obj.prt_data("Parents", cnts_p, prt)
    obj.prt_data("Children", cnts_c, prt)
示例#6
0
def describe_go2obj(go2obj, prt):
    """Describe distribution of parent and child GO term counts."""
    # Related GO | # GO  | range    | 25th | median | 75th | mean | stddev
    # -----------|-------|----------|------|--------|------|------|-------
    # Parents    | 44961 | 0 to   8 |    1 |      1 |    2 |    2 |      1
    # Children   | 17597 | 1 to 480 |    1 |      2 |    4 |    4 |     10
    cnts_all = [(len(o.children), len(o.parents)) for go, o in go2obj.items() if go == o.id]
    cnts_c, cnts_p = zip(*cnts_all)
    cnts_c = [n for n in cnts_c if n != 0] # Remove leaf-level counts from reported stats
    cnts_p = [n for n in cnts_p if n != 0] # Remove top-level counts from reported stats
    obj = StatsDescribe('GO', "{:6,}")
    obj.prt_hdr(prt, "Related GO")
    obj.prt_data("Parents", cnts_p, prt)
    obj.prt_data("Children", cnts_c, prt)
示例#7
0
 def prt_experiments_stats(self, prt=sys.stdout, attrs=None, genes_goids='genes'):
     """Print stats for user-specified data in experiment sets."""
     if attrs is None:
         attrs = ["fdr_actual", "frr_actual", "num_Type_I", "num_Type_II", "num_correct"]
     hdrexps = "Nul(% max) #pval #tests" # Header for col0, the description of the statistic
     namefmt = "{PERCNULL:3}% {EXP_ALPHA:5.3f} {QTY:5}"
     for attrname in attrs:
         prt.write("\n{ATTR} statistics:\n".format(ATTR=attrname))
         objstat = StatsDescribe("exps", "{:10.2f}" if attrname[:3] == "num" else "{:6.4f}")
         objstat.prt_hdr(prt, hdrexps)
         for experiment_set in self.expsets: # ExperimentSet
             expname = experiment_set.get_desc(namefmt)
             means = experiment_set.get_means(attrname, genes_goids)
             objstat.prt_data(expname, means, prt)
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

fdr_bh
name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
---------|----------|----------------------|----------|----------|----------|----------|---------
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    """
    #pylint: disable=no-member
    nts_goids = [nt for nt in nts if nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_hdr()
    statsobj.prt_data("GOATOOLS", fdr_vals)
示例#9
0
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

fdr_bh
name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
---------|----------|----------------------|----------|----------|----------|----------|---------
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    """
    #pylint: disable=no-member
    # Somehow nts contains fields of empty string, which we can check with:
    # print([(nt.GO, nt.p_fdr_bh) for nt in nts])
    nts_goids = [nt for nt in nts if nt.p_fdr_bh != '' and nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_hdr()
    statsobj.prt_data("GOATOOLS", fdr_vals)
示例#10
0
def main(prt=sys.stdout):
    """Statistics for the protein-coding mouse gene association."""
    godag = get_godag()
    params = {
        'association_file': os.path.join(REPO, 'gene_association.mgi'),
        'genes_population': ensm2nt.keys()
    }  # Population genes
    objassc = DataAssc(params, godag)
    # Statistics for number of genes per GO in the mouse association for protein-coding genes
    go2numgenes = {go: len(genes) for go, genes in objassc.go2genes.items()}
    objdesc = StatsDescribe("GOs", "{:>5.0f}")
    objdesc.prt_hdr(prt, name="\nname      ")
    objdesc.prt_data("# genes/GO", go2numgenes.values(), prt)
    # Statistics for number of GOs per gene in the mouse association for protein-coding genes
    gene2numgos = {
        gene: len(gos)
        for gene, gos in objassc.objassc_all.assc_geneid2gos.items()
    }
    objdesc = StatsDescribe("genes", "{:>5.0f}")
    objdesc.prt_hdr(prt, name="\nname      ")
    objdesc.prt_data("# GOs/gene", gene2numgos.values(), prt)
    # Percentage of Ensembl mouse genes covered by GO annotations
    num_pc = len(params['genes_population'])
    num_assc = len(objassc.objassc_all.assc_geneid2gos)
    prt.write(
        "{PERC:2.0f}% of {A} of {P} Mouse protein-coding genes are annotated by GO IDs.\n"
        .format(PERC=100.0 * num_assc / num_pc, P=num_pc, A=num_assc))