def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) trdata = None tedata = None tfids = load_tfids(args) for tfid in tfids: if "calib" in args.steps: print "-------------- calib:", tfid, "--------------" trdata = load_traindata(tfid, args) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib) if "train" in args.steps: print "-------------- train:", tfid, "--------------" trdata = load_traindata(tfid, args) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial) if "test" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_metrics(tedata, "test", args.finaldir) if "report" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_featuremaps(tedata, args.finaldir, args.reportdir, maxrows=100000) util.save_report(args.finaldir, args.reportdir, tfids)
def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) tfgroups = load_tfgroups(args) util.globals.flags.push("normalize_targets", True) for tfgroup in tfgroups: trdata = None if len(tfgroup["ids"]) == 0: print "No TFs to train on microarray %s"%tfgroup["train_fold"] continue if "calib" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True) if "train" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial, metric_key="pearson.r") for tfgroup in tfgroups: tedata = None newids = [] for id in tfgroup["ids"]: if os.path.exists(args.outdir+"/final/"+id+"/model.pkl"): newids.append(id) else: print "WARNING: did not find model for %s, skipping" % id tfgroup["ids"] = newids if len(tfgroup["ids"]) == 0: print "No TFs to test on microarray %s"%tfgroup["train_fold"] continue if "test" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"], args) if "report" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) util.save_featuremaps(tedata, args.finaldir, args.reportdir) if "report" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[]) save_report(args.finaldir, args.reportdir, all_tfids, index_metric="pearson") save_pbm_performance_table(args, all_tfids) if "chip" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[]) save_chip_performance_table(args, all_tfids)
def save_pfms(args): maxrows = 10000 if args.quick else None chunktargets, chunkcols = get_chunktargets(args) print "Loading PBM data...", if args.mode == "A": testfold = "B" elif args.mode == "B": testfold = "A" else: testfold = "AB" pbmdata = util.datasource.fromtxt("../data/rnac/sequences.tsv.gz", None, "../data/rnac/targets.tsv.gz", targetcols=chunkcols, foldfilter=testfold, maxrows=maxrows) print "done" util.save_featuremaps(pbmdata, args.finaldir, args.reportdir)
def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) trdata = None tedata = None tfids = load_tfids(args) for tfid in tfids: if "calib" in args.steps: print "-------------- calib:", tfid, "--------------" set_motif_lengths(args, models, tfid) trdata = load_traindata(tfid, args) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=False) if "train" in args.steps: print "-------------- train:", tfid, "--------------" set_motif_lengths(args, models, tfid) trdata = load_traindata(tfid, args) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial) if "test" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_metrics(tedata, "test", args.finaldir) if "report" in args.steps: tedata = load_testdata(tedata, tfids, args) util.save_featuremaps(tedata, args.finaldir, args.reportdir) util.save_report(args.finaldir, args.reportdir, tfids)
def main(): util.enable_reversecomplement() args = loadargs() models = loadmodels(args) tfgroups = load_tfgroups(args) util.globals.flags.push("normalize_targets", True) for tfgroup in tfgroups: trdata = None if len(tfgroup["ids"]) == 0: print "No TFs to train on microarray %s" % tfgroup["train_fold"] continue if "calib" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True) if "train" in args.steps: trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True) util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial, metric_key="pearson.r") for tfgroup in tfgroups: tedata = None newids = [] for id in tfgroup["ids"]: if os.path.exists(args.outdir + "/final/" + id + "/model.pkl"): newids.append(id) else: print "WARNING: did not find model for %s, skipping" % id tfgroup["ids"] = newids if len(tfgroup["ids"]) == 0: print "No TFs to test on microarray %s" % tfgroup["train_fold"] continue if "test" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"], args) if "report" in args.steps: tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False) util.save_featuremaps(tedata, args.finaldir, args.reportdir) if "report" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], []) save_report(args.finaldir, args.reportdir, all_tfids, index_metric="pearson") save_pbm_performance_table(args, all_tfids) if "chip" in args.steps: all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], []) save_chip_performance_table(args, all_tfids)