def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") parser.add_option( "--task", dest="task", type="choice", choices=["mafs", "penetrance", "detect_duplicates", "allele_diff"], help="task to perform") parser.add_option("--ped-file", dest="ped_file", type="string", help="plink format .ped file") parser.add_option("--map-file", dest="map_file", type="string", help="plink format .map file") parser.add_option("--freq-file", dest="mafs", type="string", help="text file containing populations minor " "allele frequencies of variants. One row per " "variant with ID MAF") parser.add_option("--groups-file", dest="group_file", type="string", help="file containing group labels for individuals " "in the provided ped file") parser.add_option("--ref-label", dest="ref_label", type="string", help="group label to be used as the reference case") parser.add_option("--test-label", dest="test_label", type="string", help="group label to be used as the test case") parser.add_option("--subset", dest="subset", type="choice", choices=["cases", "gender"], help="subset the " "data by either case/control or gender") parser.add_option("--take-last", dest="take", action="store_true", help="if use duplicates will take the last variant, " "default behaviour is to take the first") parser.add_option("--outfile-pattern", dest="out_pattern", type="string", help="outfile pattern to use for finding duplicates " "and triallelic variants") parser.add_option("--snp-set", dest="snp_subset", type="string", help="list of SNPs to include") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) parser.set_defaults(mafs=None, subset=None, take_last=False) if options.task == "mafs": mafs = gwas.countByVariantAllele(options.ped_file, options.map_file) mafs.to_csv(options.stdout, index_col=None, sep="\t") elif options.task == "penetrance": summary, pens = gwas.calcPenetrance(options.ped_file, options.map_file, subset=options.subset, mafs=options.mafs, snpset=options.snp_subset) pens.to_csv(options.stdout, sep="\t", index_label="SNP") summary.to_csv("/".join([os.getcwd(), "penetrance_summary.txt"]), sep="\t", index_label="SNP") elif options.task == "allele_diff": allele_diffs = gwas.calcMaxAlleleFreqDiff( ped_file=options.ped_file, map_file=options.map_file, group_file=options.group_file, test=options.test_label, ref=options.ref_label) allele_diffs.to_csv(options.stdout, sep="\t") elif options.task == "detect_duplicates": # find variants with duplicated position and shared reference # allele indicative of triallelic variants - also same ID # ouput to a filter list infile = argv[-1] dups, tris, oves = gwas.findDuplicateVariants(bim_file=infile, take_last=options.take) if os.path.isabs(options.out_pattern): with open(options.out_pattern + ".triallelic", "w") as otfile: for tvar in tris: otfile.write("%s\n" % tvar) with open(options.out_pattern + ".duplicates", "w") as odfile: for dvar in dups: odfile.write("%s\n" % dvar) with open(options.out_pattern + ".overlapping", "w") as ovfile: for ovar in oves: ovfile.write("%s\n" % ovar) else: outpattern = os.path.abspath(options.out_pattern) with open(outpattern + ".triallelic", "w") as otfile: for tvar in tris: otfile.write("%s\n" % tvar) with open(outpattern + ".duplicates", "w") as odfile: for dvar in dups: odfile.write("%s\n" % dvar) with open(outpattern + ".overlapping", "w") as ovfile: for ovar in oves: ovfile.write("%s\n" % ovar) # write footer and output benchmark information. E.Stop()