help="indent GO terms") (opts, args) = p.parse_args() bad = check_bad_args(args) if bad: print(bad) sys.exit(p.print_help()) min_ratio = opts.ratio if min_ratio is not None: assert 1 <= min_ratio <= 2 assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)" study_fn, pop_fn, assoc_fn = args study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare) assoc = read_associations(assoc_fn) methods = ["bonferroni", "sidak", "holm"] if opts.fdr: methods.append("fdr") obo_dag = GODag(obo_file="go-basic.obo") g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=opts.alpha, study=study, methods=methods) g.print_summary(min_ratio=min_ratio, indent=opts.indent, pval=opts.pval)
study, pop = read_geneset(study_fn, pop_fn, compare=args.compare) assoc = read_associations(assoc_fn) methods = ["bonferroni", "sidak", "holm"] if args.fdr: methods.append("fdr") starttime = time.clock() # obo_dag = GODag(obo_file=args.obo) obo_dag = read_data() endtime = time.clock() f = open('E:/time.txt', 'w') f.write(str(endtime - starttime)) f.close() print(str(endtime - starttime)) # save_data(obo_dag) # obo_dag=read_data() g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=args.alpha, study=study, methods=methods) endtime = time.clock() f = open('E:/time1.txt', 'w') f.write(str(endtime - starttime)) f.close() # print (endtime - starttime) g.print_summary(min_ratio=min_ratio, indent=args.indent, pval=args.pval)
action='store_true', help="Calculate the false discovery rate (alt. to the " "Bonferroni but slower)") p.add_option('--indent', dest='indent', default=False, action='store_true', help="indent GO terms") (opts, args) = p.parse_args() bad = check_bad_args(args) if bad: print bad sys.exit(p.print_help()) min_ratio = opts.ratio if min_ratio is not None: assert 1 <= min_ratio <= 2 assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)" study_fn, pop_fn, assoc_fn = args study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare) assoc = read_associations(assoc_fn) methods = ["bonferroni", "sidak", "holm"] if opts.fdr: methods.append("fdr") obo_dag = GODag(obo_file="gene_ontology.1_2.obo") g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=opts.alpha, study=study, methods=methods) g.print_summary(min_ratio=min_ratio, indent=opts.indent, pval=opts.pval)
p.add_option('--fdr', dest='fdr', default=False, action='store_true', help="calculate the false discovery rate (alternative to the Bonferroni correction)") p.add_option('--indent', dest='indent', default=False, action='store_true', help="indent GO terms") (opts, args) = p.parse_args() bad = check_bad_args(args) if bad: print bad sys.exit(p.print_help()) alpha = float(opts.alpha) if opts.alpha else 0.05 min_ratio = opts.ratio if not min_ratio is None: assert 1 <= min_ratio <= 2 study_fn, pop_fn, assoc_fn = args study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare) assoc = read_associations(assoc_fn) methods=["bonferroni", "sidak", "holm"] if opts.fdr: methods.append("fdr") obo_dag = GODag(obo_file="gene_ontology.1_2.obo") g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=alpha, study=study, methods=methods) g.print_summary(min_ratio=min_ratio, indent=opts.indent)
study, pop = read_geneset(study_fn, pop_fn, compare=args.compare) print("Study: {0} vs. Population {1}".format(len(study), len(pop)), file=sys.stderr) if not args.compare: # sanity check if len(pop) < len(study): exit("\nERROR: The study file contains more elements than the population file. " "Please check that the study file is a subset of the population file.\n") # check the fraction of genomic ids that overlap between study # and population overlap = float(len(study & pop)) / len(study) if 0.7 < overlap < 0.95: sys.stderr.write("\nWARNING: only {} fraction of genes/proteins in study are found in " "the population background.\n\n".format(overlap)) if overlap <= 0.7: exit("\nERROR: only {} of genes/proteins in the study are found in the " "background population. Please check.\n".format(overlap)) assoc = read_associations(assoc_fn) methods = ["bonferroni", "sidak", "holm"] if args.fdr: methods.append("fdr") obo_dag = GODag(obo_file=args.obo) propagate_counts = not args.no_propagate_counts g = GOEnrichmentStudy(pop, assoc, obo_dag, propagate_counts=propagate_counts, alpha=args.alpha, study=study, methods=methods) g.print_summary(min_ratio=min_ratio, indent=args.indent, pval=args.pval)
'$in': list(forward_loci) }}, {'_id': True}) ] # One of Ana's Sample study_indexDB = '/home/gstupp/01_2015_mass_spec/H1_11082014/1108_Gly1_2014_12_15_15_29205/dtaselect_results_sfp0.01_p2/DTASelect-filter.txt' study_ps = build_proteins_from_peptides.main(study_indexDB) study_loci = set(chain(*[x['forward_loci'] for x in study_ps])) study = setup_study_pop(study_loci) # One of Sandip's microbiome samples pop_indexDB = '/home/gstupp/01_2015_mass_spec/120314_SC_sampleH1sol_HCD35/DTASelect-filter.txt' pop_ps = build_proteins_from_peptides.main(pop_indexDB) pop_loci = set(chain(*[x['forward_loci'] for x in pop_ps])) pop = setup_study_pop(study_loci and pop_loci) # set up hash -> GO matching assoc = setup_association(study_loci and pop_loci) obo_dag = GODag(obo_file=os.path.expanduser("~/go/go-basic.obo")) study_sub = study[:1000] g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=0.05, study=study, methods=["fdr"]) g.print_summary(min_ratio=None, indent=False, pval=None)
def check_enrichment(study_fn, pop_fn, assoc_fn, print_summary=False, save_summary=True, savepath=None, obo_dag=None): p = optparse.OptionParser(__doc__) p.add_option('--alpha', default=0.05, type="float", help="Test-wise alpha for multiple testing " "[default: %default]") p.add_option('--pval', default=None, type="float", help="Family-wise alpha (whole experiment), only print out " "Bonferroni p-value is less than this value. " "[default: %default]") p.add_option('--compare', dest='compare', default=False, action='store_true', help="the population file as a comparison group. if this " "flag is specified, the population is used as the study " "plus the `population/comparison`") p.add_option('--ratio', dest='ratio', type='float', default=None, help="only show values where the difference between study " "and population ratios is greater than this. useful for " "excluding GO categories with small differences, but " "containing large numbers of genes. should be a value " "between 1 and 2. ") p.add_option('--fdr', dest='fdr', default=False, action='store_true', help="Calculate the false discovery rate (alt. to the " "Bonferroni but slower)") p.add_option('--indent', dest='indent', default=False, action='store_true', help="indent GO terms") (opts, args) = p.parse_args() args = [study_fn, pop_fn, assoc_fn] bad = check_bad_args(args) if bad: print(bad) sys.exit(p.print_help()) min_ratio = opts.ratio if min_ratio is not None: assert 1 <= min_ratio <= 2 assert 0 < opts.alpha < 1, "Test-wise alpha must fall between (0, 1)" study_fn, pop_fn, assoc_fn = args study, pop = read_geneset(study_fn, pop_fn, compare=opts.compare) assoc = read_associations(assoc_fn) methods = ["bonferroni", "sidak", "holm"] if opts.fdr: methods.append("fdr") if obo_dag is None: obo_file = "go-basic.obo" obo_dag = GODag(obo_file=obo_file) g = GOEnrichmentStudy(pop, assoc, obo_dag, alpha=opts.alpha, methods=methods) results = g.run_study(study) if print_summary: g.print_summary(results, min_ratio=min_ratio, indent=opts.indent, pval=opts.pval) if save_summary: if savepath is None: savepath = study_fn.replace( study_fn.split("/")[-1], "enrichment_" + study_fn.split("/")[-1]) g.wr_tsv(savepath, results)