示例#1
0
def main():
    util.enable_reversecomplement()

    args = loadargs()
    models = loadmodels(args)
    trdata = None
    tedata = None
    tfids = load_tfids(args)

    for tfid in tfids:
        if "calib" in args.steps:
            print "-------------- calib:", tfid, "--------------"
            trdata = load_traindata(tfid, args)
            util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib)

        if "train" in args.steps:
            print "-------------- train:", tfid, "--------------"
            trdata = load_traindata(tfid, args)
            util.train(models, trdata, args.calibdir, args.finaldir, nfold=1, ntrial=args.ntrial)

    if "test" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_metrics(tedata, "test", args.finaldir)

    if "report" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_featuremaps(tedata, args.finaldir, args.reportdir, maxrows=100000)
        util.save_report(args.finaldir, args.reportdir, tfids)
示例#2
0
def main():
    util.enable_reversecomplement()

    args   = loadargs()
    models = loadmodels(args)
    tfgroups = load_tfgroups(args)
    util.globals.flags.push("normalize_targets", True)

    for tfgroup in tfgroups:
        trdata = None
        if len(tfgroup["ids"]) == 0:
            print "No TFs to train on microarray %s"%tfgroup["train_fold"]
            continue

        if "calib" in args.steps:
            trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True)
            util.calibrate(models, trdata, args.calibdir, nfold=args.nfold, ncalib=args.ncalib, allfolds=True)

        if "train" in args.steps:
            trdata = load_pbmdata(trdata, tfgroup["ids"], tfgroup["train_fold"], args, remove_probe_bias=True)
            util.train(models, trdata, args.calibdir, args.finaldir, nfold=1,          ntrial=args.ntrial, metric_key="pearson.r")

    for tfgroup in tfgroups:
        tedata = None

        newids = []
        for id in tfgroup["ids"]:
            if os.path.exists(args.outdir+"/final/"+id+"/model.pkl"):
                newids.append(id)
            else:
                print "WARNING: did not find model for %s, skipping" % id
        tfgroup["ids"] = newids

        if len(tfgroup["ids"]) == 0:
            print "No TFs to test on microarray %s"%tfgroup["train_fold"]
            continue

        if "test" in args.steps:
            tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False)
            save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"], args)

        if "report" in args.steps:
            tedata = load_pbmdata(tedata, tfgroup["ids"], tfgroup["test_fold"], args, remove_probe_bias=False)
            util.save_featuremaps(tedata, args.finaldir, args.reportdir)

    if "report" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[])
        save_report(args.finaldir, args.reportdir, all_tfids, index_metric="pearson")
        save_pbm_performance_table(args, all_tfids)

    if "chip" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups],[])
        save_chip_performance_table(args, all_tfids)
示例#3
0
def save_pfms(args):
    maxrows = 10000 if args.quick else None
    chunktargets, chunkcols = get_chunktargets(args)

    print "Loading PBM data...",
    if args.mode == "A":
        testfold = "B"
    elif args.mode == "B":
        testfold = "A"
    else:
        testfold = "AB"
    pbmdata = util.datasource.fromtxt("../data/rnac/sequences.tsv.gz",
                                      None,
                                      "../data/rnac/targets.tsv.gz",
                                      targetcols=chunkcols,
                                      foldfilter=testfold,
                                      maxrows=maxrows)
    print "done"

    util.save_featuremaps(pbmdata, args.finaldir, args.reportdir)
示例#4
0
def main():
    util.enable_reversecomplement()

    args = loadargs()
    models = loadmodels(args)
    trdata = None
    tedata = None
    tfids = load_tfids(args)

    for tfid in tfids:

        if "calib" in args.steps:
            print "-------------- calib:", tfid, "--------------"
            set_motif_lengths(args, models, tfid)
            trdata = load_traindata(tfid, args)
            util.calibrate(models,
                           trdata,
                           args.calibdir,
                           nfold=args.nfold,
                           ncalib=args.ncalib,
                           allfolds=False)

        if "train" in args.steps:
            print "-------------- train:", tfid, "--------------"
            set_motif_lengths(args, models, tfid)
            trdata = load_traindata(tfid, args)
            util.train(models,
                       trdata,
                       args.calibdir,
                       args.finaldir,
                       nfold=1,
                       ntrial=args.ntrial)

    if "test" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_metrics(tedata, "test", args.finaldir)

    if "report" in args.steps:
        tedata = load_testdata(tedata, tfids, args)
        util.save_featuremaps(tedata, args.finaldir, args.reportdir)
        util.save_report(args.finaldir, args.reportdir, tfids)
示例#5
0
def main():
    util.enable_reversecomplement()

    args = loadargs()
    models = loadmodels(args)
    tfgroups = load_tfgroups(args)
    util.globals.flags.push("normalize_targets", True)

    for tfgroup in tfgroups:
        trdata = None
        if len(tfgroup["ids"]) == 0:
            print "No TFs to train on microarray %s" % tfgroup["train_fold"]
            continue

        if "calib" in args.steps:
            trdata = load_pbmdata(trdata,
                                  tfgroup["ids"],
                                  tfgroup["train_fold"],
                                  args,
                                  remove_probe_bias=True)
            util.calibrate(models,
                           trdata,
                           args.calibdir,
                           nfold=args.nfold,
                           ncalib=args.ncalib,
                           allfolds=True)

        if "train" in args.steps:
            trdata = load_pbmdata(trdata,
                                  tfgroup["ids"],
                                  tfgroup["train_fold"],
                                  args,
                                  remove_probe_bias=True)
            util.train(models,
                       trdata,
                       args.calibdir,
                       args.finaldir,
                       nfold=1,
                       ntrial=args.ntrial,
                       metric_key="pearson.r")

    for tfgroup in tfgroups:
        tedata = None

        newids = []
        for id in tfgroup["ids"]:
            if os.path.exists(args.outdir + "/final/" + id + "/model.pkl"):
                newids.append(id)
            else:
                print "WARNING: did not find model for %s, skipping" % id
        tfgroup["ids"] = newids

        if len(tfgroup["ids"]) == 0:
            print "No TFs to test on microarray %s" % tfgroup["train_fold"]
            continue

        if "test" in args.steps:
            tedata = load_pbmdata(tedata,
                                  tfgroup["ids"],
                                  tfgroup["test_fold"],
                                  args,
                                  remove_probe_bias=False)
            save_test_performance(tedata, tfgroup["ids"], tfgroup["test_fold"],
                                  args)

        if "report" in args.steps:
            tedata = load_pbmdata(tedata,
                                  tfgroup["ids"],
                                  tfgroup["test_fold"],
                                  args,
                                  remove_probe_bias=False)
            util.save_featuremaps(tedata, args.finaldir, args.reportdir)

    if "report" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], [])
        save_report(args.finaldir,
                    args.reportdir,
                    all_tfids,
                    index_metric="pearson")
        save_pbm_performance_table(args, all_tfids)

    if "chip" in args.steps:
        all_tfids = sum([tfgroup["ids"] for tfgroup in tfgroups], [])
        save_chip_performance_table(args, all_tfids)