示例#1
0
                 help="indent GO terms")

    (opts, args) = p.parse_args()
    bad = check_bad_args(args)
    if bad:
        print(bad)
        sys.exit(p.print_help())

    min_ratio = opts.ratio
    if min_ratio is not None:
        assert 1 <= min_ratio <= 2

    assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)"

    study_fn, pop_fn, assoc_fn = args
    study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare)
    assoc = read_associations(assoc_fn)

    methods = ["bonferroni", "sidak", "holm"]
    if opts.fdr:
        methods.append("fdr")

    obo_dag = GODag(obo_file="go-basic.obo")
    g = GOEnrichmentStudy(pop,
                          assoc,
                          obo_dag,
                          alpha=opts.alpha,
                          study=study,
                          methods=methods)
    g.print_summary(min_ratio=min_ratio, indent=opts.indent, pval=opts.pval)
示例#2
0
    study, pop = read_geneset(study_fn, pop_fn, compare=args.compare)
    assoc = read_associations(assoc_fn)

    methods = ["bonferroni", "sidak", "holm"]
    if args.fdr:
        methods.append("fdr")

    starttime = time.clock()
    # obo_dag = GODag(obo_file=args.obo)
    obo_dag = read_data()
    endtime = time.clock()
    f = open('E:/time.txt', 'w')
    f.write(str(endtime - starttime))
    f.close()
    print(str(endtime - starttime))

    # save_data(obo_dag)
    # obo_dag=read_data()
    g = GOEnrichmentStudy(pop,
                          assoc,
                          obo_dag,
                          alpha=args.alpha,
                          study=study,
                          methods=methods)
    endtime = time.clock()
    f = open('E:/time1.txt', 'w')
    f.write(str(endtime - starttime))
    f.close()
    # print (endtime - starttime)
    g.print_summary(min_ratio=min_ratio, indent=args.indent, pval=args.pval)
示例#3
0
                 action='store_true',
                 help="Calculate the false discovery rate (alt. to the "
                 "Bonferroni but slower)")
    p.add_option('--indent', dest='indent', default=False,
                 action='store_true', help="indent GO terms")

    (opts, args) = p.parse_args()
    bad = check_bad_args(args)
    if bad:
        print bad
        sys.exit(p.print_help())

    min_ratio = opts.ratio
    if min_ratio is not None:
        assert 1 <= min_ratio <= 2

    assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)"

    study_fn, pop_fn, assoc_fn = args
    study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare)
    assoc = read_associations(assoc_fn)

    methods = ["bonferroni", "sidak", "holm"]
    if opts.fdr:
        methods.append("fdr")

    obo_dag = GODag(obo_file="gene_ontology.1_2.obo")
    g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=opts.alpha,
                          study=study, methods=methods)
    g.print_summary(min_ratio=min_ratio, indent=opts.indent, pval=opts.pval)
示例#4
0
    p.add_option('--fdr', dest='fdr', default=False,
                action='store_true',
                help="calculate the false discovery rate (alternative to the Bonferroni correction)")
    p.add_option('--indent', dest='indent', default=False,
                action='store_true', help="indent GO terms")

    (opts, args) = p.parse_args()
    bad = check_bad_args(args)
    if bad:
        print bad
        sys.exit(p.print_help())

    alpha = float(opts.alpha) if opts.alpha else 0.05

    min_ratio = opts.ratio
    if not min_ratio is None:
        assert 1 <= min_ratio <= 2

    study_fn, pop_fn, assoc_fn = args
    study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare)
    assoc = read_associations(assoc_fn)

    methods=["bonferroni", "sidak", "holm"]
    if opts.fdr:
        methods.append("fdr")

    obo_dag = GODag(obo_file="gene_ontology.1_2.obo")
    g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=alpha, study=study, methods=methods)
    g.print_summary(min_ratio=min_ratio, indent=opts.indent)

示例#5
0
    study, pop = read_geneset(study_fn, pop_fn, compare=args.compare)
    print("Study: {0} vs. Population {1}".format(len(study), len(pop)), file=sys.stderr)

    if not args.compare:  # sanity check
        if len(pop) < len(study):
            exit("\nERROR: The study file contains more elements than the population file. "
                 "Please check that the study file is a subset of the population file.\n")
        # check the fraction of genomic ids that overlap between study
        # and population
        overlap = float(len(study & pop)) / len(study)
        if 0.7 < overlap < 0.95:
            sys.stderr.write("\nWARNING: only {} fraction of genes/proteins in study are found in "
                             "the population  background.\n\n".format(overlap))
        if overlap <= 0.7:
            exit("\nERROR: only {} of genes/proteins in the study are found in the "
                 "background population. Please check.\n".format(overlap))

    assoc = read_associations(assoc_fn)

    methods = ["bonferroni", "sidak", "holm"]
    if args.fdr:
        methods.append("fdr")

    obo_dag = GODag(obo_file=args.obo)
    propagate_counts = not args.no_propagate_counts
    g = GOEnrichmentStudy(pop, assoc, obo_dag,
                          propagate_counts=propagate_counts,
                          alpha=args.alpha,
                          study=study, methods=methods)
    g.print_summary(min_ratio=min_ratio, indent=args.indent, pval=args.pval)
示例#6
0
            '$in': list(forward_loci)
        }}, {'_id': True})
    ]


# One of Ana's Sample
study_indexDB = '/home/gstupp/01_2015_mass_spec/H1_11082014/1108_Gly1_2014_12_15_15_29205/dtaselect_results_sfp0.01_p2/DTASelect-filter.txt'
study_ps = build_proteins_from_peptides.main(study_indexDB)
study_loci = set(chain(*[x['forward_loci'] for x in study_ps]))
study = setup_study_pop(study_loci)

# One of Sandip's microbiome samples
pop_indexDB = '/home/gstupp/01_2015_mass_spec/120314_SC_sampleH1sol_HCD35/DTASelect-filter.txt'
pop_ps = build_proteins_from_peptides.main(pop_indexDB)
pop_loci = set(chain(*[x['forward_loci'] for x in pop_ps]))
pop = setup_study_pop(study_loci and pop_loci)

# set up hash -> GO matching
assoc = setup_association(study_loci and pop_loci)

obo_dag = GODag(obo_file=os.path.expanduser("~/go/go-basic.obo"))

study_sub = study[:1000]
g = GOEnrichmentStudy(pop,
                      assoc,
                      obo_dag,
                      alpha=0.05,
                      study=study,
                      methods=["fdr"])
g.print_summary(min_ratio=None, indent=False, pval=None)
示例#7
0
def check_enrichment(study_fn,
                     pop_fn,
                     assoc_fn,
                     print_summary=False,
                     save_summary=True,
                     savepath=None,
                     obo_dag=None):
    p = optparse.OptionParser(__doc__)

    p.add_option('--alpha',
                 default=0.05,
                 type="float",
                 help="Test-wise alpha for multiple testing "
                 "[default: %default]")
    p.add_option('--pval',
                 default=None,
                 type="float",
                 help="Family-wise alpha (whole experiment), only print out "
                 "Bonferroni p-value is less than this value. "
                 "[default: %default]")
    p.add_option('--compare',
                 dest='compare',
                 default=False,
                 action='store_true',
                 help="the population file as a comparison group. if this "
                 "flag is specified, the population is used as the study "
                 "plus the `population/comparison`")
    p.add_option('--ratio',
                 dest='ratio',
                 type='float',
                 default=None,
                 help="only show values where the difference between study "
                 "and population ratios is greater than this. useful for "
                 "excluding GO categories with small differences, but "
                 "containing large numbers of genes. should be a value "
                 "between 1 and 2. ")
    p.add_option('--fdr',
                 dest='fdr',
                 default=False,
                 action='store_true',
                 help="Calculate the false discovery rate (alt. to the "
                 "Bonferroni but slower)")
    p.add_option('--indent',
                 dest='indent',
                 default=False,
                 action='store_true',
                 help="indent GO terms")

    (opts, args) = p.parse_args()
    args = [study_fn, pop_fn, assoc_fn]
    bad = check_bad_args(args)
    if bad:
        print(bad)
        sys.exit(p.print_help())

    min_ratio = opts.ratio
    if min_ratio is not None:
        assert 1 <= min_ratio <= 2

    assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)"

    study_fn, pop_fn, assoc_fn = args
    study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare)
    assoc = read_associations(assoc_fn)
    methods = ["bonferroni", "sidak", "holm"]
    if opts.fdr:
        methods.append("fdr")
    if obo_dag is None:
        obo_file = "go-basic.obo"
        obo_dag = GODag(obo_file=obo_file)
    g = GOEnrichmentStudy(pop,
                          assoc,
                          obo_dag,
                          alpha=opts.alpha,
                          methods=methods)

    results = g.run_study(study)

    if print_summary:
        g.print_summary(results,
                        min_ratio=min_ratio,
                        indent=opts.indent,
                        pval=opts.pval)

    if save_summary:
        if savepath is None:
            savepath = study_fn.replace(
                study_fn.split("/")[-1],
                "enrichment_" + study_fn.split("/")[-1])
        g.wr_tsv(savepath, results)