def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('vcf', help='Module 04 genotyped PESR vcf') parser.add_argument('allosome_contigs_file') parser.add_argument('famfile', type=argparse.FileType('r')) parser.add_argument('case_sample') parser.add_argument('min_size', help='minumum size at which to apply conversions', type=int) parser.add_argument('-o', '--outfile', help='Output file [default: stdout]') args = parser.parse_args() if args.vcf in '- stdin'.split(): vcf = pysam.VariantFile(sys.stdin) else: vcf = pysam.VariantFile(args.vcf) header = vcf.header case_sample = args.case_sample min_size = args.min_size if args.outfile is None: fout = pysam.VariantFile(sys.stdout, 'w', header=header) else: out = args.outfile fout = pysam.VariantFile(out, 'w', header=header) allosome_contigs = read_contigs_list(args.allosome_contigs_file) fam = parse_famfile(args.famfile) case_sample_sex = fam.samples[args.case_sample].sex samples_by_sex = { '1': [s for s in fam.samples if fam.samples[s].sex == '1'], '2': [s for s in fam.samples if fam.samples[s].sex == '2'] } for record in vcf: contig = record.contig svtype = record.info['SVTYPE'] if (svtype == 'DEL' or svtype == 'DUP') and record.info['SVLEN'] >= min_size: if contig not in allosome_contigs: if not has_depth_support_autosome( record, case_sample) and has_sr_or_pe_support( record, case_sample): record.info['SVTYPE'] = 'BND' record.alts = ['<BND>'] else: if not has_depth_support_allosome(record, case_sample, samples_by_sex[case_sample_sex]) \ and has_sr_or_pe_support(record, case_sample): record.info['SVTYPE'] = 'BND' record.alts = ['<BND>'] fout.write(record)
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('vcf') parser.add_argument('famfile', type=argparse.FileType('r')) parser.add_argument('var_fout', type=argparse.FileType('w')) parser.add_argument('obs_fout', type=argparse.FileType('w')) args = parser.parse_args() vcf = pysam.VariantFile(args.vcf) fam = parse_famfile(args.famfile) scraper = StatsScraper(vcf, fam, args.var_fout, args.obs_fout) scraper.scrape()
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('vcf') parser.add_argument('filtered', type=argparse.FileType('r'), help='De novo filter results') parser.add_argument('famfile', type=argparse.FileType('r')) parser.add_argument('fout', help='Filtered VCF') args = parser.parse_args() vcf = pysam.VariantFile(args.vcf) fam = parse_famfile(args.famfile) fout = sys.stdout if args.fout in 'stdout -'.split() else args.fout fout = pysam.VariantFile(fout, 'w', header=vcf.header) filter_dn_variants(vcf, args.filtered, fam, fout)
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('metrics', type=argparse.FileType('r')) parser.add_argument('cutoffs') parser.add_argument('famfile', type=argparse.FileType('r')) parser.add_argument('fout', type=argparse.FileType('w')) args = parser.parse_args() fam = parse_famfile(args.famfile) metrics = metric_parser(args.metrics, fam) cutoffs = pd.read_table(args.cutoffs) cutoffs = format_cutoffs(cutoffs) fmt = '{0}\t{1}\t{2}\t{3}\n' for name, family, samples, support in filter_denovo(metrics, fam, cutoffs): args.fout.write(fmt.format(name, family, samples, support))
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('vcf') parser.add_argument('famfile', type=argparse.FileType('r')) parser.add_argument('-c', '--countfile', required=True) parser.add_argument('-d', '--discfile', required=True) parser.add_argument('--discfile-index') parser.add_argument('--countfile-index') parser.add_argument('--background', type=int, default=160) parser.add_argument('--max-parents', type=float, default=10) parser.add_argument('petest', type=argparse.FileType('w'), help='fout') parser.add_argument('srtest', type=argparse.FileType('w'), help='fout') args = parser.parse_args() vcf = pysam.VariantFile(args.vcf) fam = parse_famfile(args.famfile) if args.discfile_index is None: discfile = pysam.TabixFile(args.discfile, parser=pysam.asTuple()) else: discfile = pysam.TabixFile(args.discfile, index=args.discfile_index, parser=pysam.asTuple()) if args.countfile_index is None: countfile = pysam.TabixFile(args.countfile, parser=pysam.asTuple()) else: countfile = pysam.TabixFile(args.countfile, index=args.countfile_index, parser=pysam.asTuple()) header = 'name sample log_pval called_median bg_median'.split() args.petest.write('\t'.join(header) + '\n') header = 'name sample coord pos log_pval called_median bg_median'.split() args.srtest.write('\t'.join(header) + '\n') runner = DenovoTestRunner(vcf, fam, countfile, discfile, args.petest, args.srtest, args.background, args.max_parents) runner.run()
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('vcf') parser.add_argument('famfile', type=argparse.FileType('r')) parser.add_argument('fout') parser.add_argument('--max-parents', type=int, default=10) args = parser.parse_args() if args.vcf in '- stdin'.split(): vcf = pysam.VariantFile(sys.stdin) else: vcf = pysam.VariantFile(args.vcf) if args.fout in '- stdout'.split(): fout = pysam.VariantFile(sys.stdout, 'w', header=vcf.header) else: fout = pysam.VariantFile(args.fout, 'w', header=vcf.header) fam = parse_famfile(args.famfile) for record in filter_denovo_records(vcf, fam, args.max_parents): fout.write(record)