Python PipelineGWAS.GWASResults示例

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("-t",
                      "--test",
                      dest="test",
                      type="string",
                      help="supply help")

    parser.add_option("--plot-type",
                      dest="plot_type",
                      type="choice",
                      choices=["manhattan", "qqplot", "epistasis"],
                      help="plot type to generate")

    parser.add_option("--resolution",
                      dest="resolution",
                      type="choice",
                      choices=["genome_wide", "chromosome", "fine_map"],
                      help="the resolution of plotting, wether the plot "
                      "depicts the whole genome, a single chromosome or "
                      "a specific locus")

    parser.add_option("--file-format",
                      dest="file_format",
                      type="choice",
                      choices=["plink", "cassi", "cassi_covar"],
                      help="input file format, used to parse the file "
                      "properly")

    parser.add_option("--save-path",
                      dest="save_path",
                      type="string",
                      help="path and filename to save image to")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    parser.set_defaults(resolution="genome_wide",
                        plot_type="manhattan",
                        file_format="plink")

    # if the input is a list of files, split them
    infile = argv[-1]
    infiles = infile.split(",")

    # need to parse epistasis output slightly differently
    if options.plot_type == "epistasis":
        epi = True
    else:
        epi = False

    if len(infiles) > 1:
        results = gwas.GWASResults(assoc_file=infiles,
                                   epistasis=epi,
                                   file_format=options.file_format)
    elif len(infiles) == 1:
        results = gwas.GWASResults(assoc_file=infile,
                                   epistasis=epi,
                                   file_format=options.file_format)
    else:
        raise IOError("no input files detected, please specifiy association "
                      "results files as the last command line argument")

    if options.plot_type == "manhattan":
        df = results.plotManhattan(resolution=options.resolution,
                                   save_path=options.save_path)
    elif options.plot_type == "qqplot":
        results.plotQQ(save_path=options.save_path,
                       resolution=options.resolution)
    elif options.plot_type == "epistasis":
        results.plotEpistasis(save_path=options.save_path,
                              resolution=options.resolution)
    else:
        pass

    # only output appended results for Manhattan plot, not qqplot
    try:
        df.to_csv(options.stdout, sep="\t", index=None)
    except UnboundLocalError:
        pass

    # write footer and output benchmark information.
    E.Stop()

示例#2

显示文件

文件： assoc2assoc.py 项目： kathrinjansen/cgat

def main(argv=None):
    """script main.
    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(version="%prog version: $Id$",
                            usage=globals()["__doc__"])

    parser.add_option("--task", dest="task", type="choice",
                      choices=["get_hits", "extract_results",
                               "merge_freq"],
                      help="task to perform")

    parser.add_option("--p-threshold", dest="p_threshold", type="float",
                      help="threshold for association p-value, below "
                      "which results will be output")

    parser.add_option("--output-directory", dest="outdir", type="string",
                      help="output file directory")

    parser.add_option("--snp-set", dest="snpset", type="string",
                      help="file containing list of SNP per row to "
                      "extract from GWAS results")

    parser.add_option("--frequency-directory", dest="freq_dir", type="string",
                      help="Directory containing plink .frq files corresponding"
                      " to all chromosomes")

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv)

    # if the input is a list of files, split them
    infile = argv[-1]
    infiles = infile.split(",")
    if len(infiles) > 1:
        results = gwas.GWASResults(assoc_file=infiles)
    elif len(infiles) == 1:
        results = gwas.GWASResults(assoc_file=infile)
    else:
        raise IOError("no input files detected, please specifiy association "
                      "results files as the last command line argument")

    if options.task == "get_hits":
        hits = results.getHits(float(options.p_threshold))
        for name, region in hits:
            try:
                try:
                    top_reg = region.sort_values(by="CHISQ",
                                                 ascending=False)
                    top_bp = top_reg.iloc[0]["BP"]
                    top_snp = top_reg.iloc[0]["SNP"]
                except KeyError:
                    top_reg = region
                    top_reg.loc[:, "STAT"] = abs(top_reg["STAT"])
                    top_reg = top_reg.sort_values(by="STAT",
                                                  ascending=False)
                    top_bp = top_reg.iloc[0]["BP"]
                    top_snp = top_reg.iloc[0]["SNP"]
            except KeyError:
                top_reg = region
                top_reg.loc[:, "STAT"] = abs(top_reg["T"])
                top_reg = top_reg.sort_values(by="T",
                                              ascending=False)
                top_bp = top_reg.iloc[0]["BP"]
                top_snp = top_reg.iloc[0]["SNP"]

            outname = "_".join(["chr%s" % str(name),
                                str(top_bp),
                                top_snp,
                                "significant"])

            outfile = outname + ".tsv"
            out_file = "/".join([options.outdir, outfile])
            E.info("output association results from Chr%s to %s" %
                   (str(name), out_file))
            # this keeps outputing the first column as unamed: 0,
            # need to remove this
            try:
                if region.columns[0] != "A1":
                    region.drop([region.columns[0]], inplace=True, axis=1)
            except:
                pass

            region.to_csv(out_file, sep="\t", index=None)

    elif options.task == "extract_results":
        with IOTools.openFile(options.snpset, "r") as sfile:
            snpset = sfile.readlines()
            snpset = [snp.rstrip("\n") for snp in snpset]

        snp_df = results.extractSNPs(snpset)
        snp_df.dropna(axis=0, how='all', inplace=True)
        snp_df.drop_duplicates(subset=["SNP"], inplace=True)
        snp_df.to_csv(options.stdout, sep="\t", index=None)

    elif options.task == "merge_freq":
        # sequentially merge GWAS result with frequency data
        # to make file for GCTA joint analysis
        regex = re.compile("(\S+).frq$")
        cojo_df = results.mergeFrequencyResults(options.freq_dir,
                                                file_regex=regex)
        cojo_df.to_csv(options.stdout, sep="\t", index=None)
    else:
        pass

    # write footer and output benchmark information.
    E.Stop()