Пример #1
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('vcf', help='Module 04 genotyped PESR vcf')
    parser.add_argument('allosome_contigs_file')
    parser.add_argument('famfile', type=argparse.FileType('r'))
    parser.add_argument('case_sample')
    parser.add_argument('min_size',
                        help='minumum size at which to apply conversions',
                        type=int)
    parser.add_argument('-o',
                        '--outfile',
                        help='Output file [default: stdout]')

    args = parser.parse_args()

    if args.vcf in '- stdin'.split():
        vcf = pysam.VariantFile(sys.stdin)
    else:
        vcf = pysam.VariantFile(args.vcf)
    header = vcf.header

    case_sample = args.case_sample
    min_size = args.min_size

    if args.outfile is None:
        fout = pysam.VariantFile(sys.stdout, 'w', header=header)
    else:
        out = args.outfile
        fout = pysam.VariantFile(out, 'w', header=header)

    allosome_contigs = read_contigs_list(args.allosome_contigs_file)

    fam = parse_famfile(args.famfile)
    case_sample_sex = fam.samples[args.case_sample].sex
    samples_by_sex = {
        '1': [s for s in fam.samples if fam.samples[s].sex == '1'],
        '2': [s for s in fam.samples if fam.samples[s].sex == '2']
    }

    for record in vcf:
        contig = record.contig
        svtype = record.info['SVTYPE']
        if (svtype == 'DEL'
                or svtype == 'DUP') and record.info['SVLEN'] >= min_size:
            if contig not in allosome_contigs:
                if not has_depth_support_autosome(
                        record, case_sample) and has_sr_or_pe_support(
                            record, case_sample):
                    record.info['SVTYPE'] = 'BND'
                    record.alts = ['<BND>']
            else:
                if not has_depth_support_allosome(record, case_sample, samples_by_sex[case_sample_sex]) \
                        and has_sr_or_pe_support(record, case_sample):
                    record.info['SVTYPE'] = 'BND'
                    record.alts = ['<BND>']
        fout.write(record)
Пример #2
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('vcf')
    parser.add_argument('famfile', type=argparse.FileType('r'))
    parser.add_argument('var_fout', type=argparse.FileType('w'))
    parser.add_argument('obs_fout', type=argparse.FileType('w'))
    args = parser.parse_args()

    vcf = pysam.VariantFile(args.vcf)
    fam = parse_famfile(args.famfile)

    scraper = StatsScraper(vcf, fam, args.var_fout, args.obs_fout)
    scraper.scrape()
Пример #3
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('vcf')
    parser.add_argument('filtered',
                        type=argparse.FileType('r'),
                        help='De novo filter results')
    parser.add_argument('famfile', type=argparse.FileType('r'))
    parser.add_argument('fout', help='Filtered VCF')
    args = parser.parse_args()

    vcf = pysam.VariantFile(args.vcf)
    fam = parse_famfile(args.famfile)

    fout = sys.stdout if args.fout in 'stdout -'.split() else args.fout
    fout = pysam.VariantFile(fout, 'w', header=vcf.header)

    filter_dn_variants(vcf, args.filtered, fam, fout)
Пример #4
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('metrics', type=argparse.FileType('r'))
    parser.add_argument('cutoffs')
    parser.add_argument('famfile', type=argparse.FileType('r'))
    parser.add_argument('fout', type=argparse.FileType('w'))
    args = parser.parse_args()

    fam = parse_famfile(args.famfile)
    metrics = metric_parser(args.metrics, fam)

    cutoffs = pd.read_table(args.cutoffs)
    cutoffs = format_cutoffs(cutoffs)

    fmt = '{0}\t{1}\t{2}\t{3}\n'
    for name, family, samples, support in filter_denovo(metrics, fam, cutoffs):
        args.fout.write(fmt.format(name, family, samples, support))
Пример #5
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('vcf')
    parser.add_argument('famfile', type=argparse.FileType('r'))
    parser.add_argument('-c', '--countfile', required=True)
    parser.add_argument('-d', '--discfile', required=True)
    parser.add_argument('--discfile-index')
    parser.add_argument('--countfile-index')
    parser.add_argument('--background', type=int, default=160)
    parser.add_argument('--max-parents', type=float, default=10)
    parser.add_argument('petest', type=argparse.FileType('w'), help='fout')
    parser.add_argument('srtest', type=argparse.FileType('w'), help='fout')
    args = parser.parse_args()

    vcf = pysam.VariantFile(args.vcf)
    fam = parse_famfile(args.famfile)

    if args.discfile_index is None:
        discfile = pysam.TabixFile(args.discfile, parser=pysam.asTuple())
    else:
        discfile = pysam.TabixFile(args.discfile,
                                   index=args.discfile_index,
                                   parser=pysam.asTuple())

    if args.countfile_index is None:
        countfile = pysam.TabixFile(args.countfile, parser=pysam.asTuple())
    else:
        countfile = pysam.TabixFile(args.countfile,
                                    index=args.countfile_index,
                                    parser=pysam.asTuple())

    header = 'name sample log_pval called_median bg_median'.split()
    args.petest.write('\t'.join(header) + '\n')

    header = 'name sample coord pos log_pval called_median bg_median'.split()
    args.srtest.write('\t'.join(header) + '\n')

    runner = DenovoTestRunner(vcf, fam, countfile, discfile, args.petest,
                              args.srtest, args.background, args.max_parents)
    runner.run()
Пример #6
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('vcf')
    parser.add_argument('famfile', type=argparse.FileType('r'))
    parser.add_argument('fout')
    parser.add_argument('--max-parents', type=int, default=10)
    args = parser.parse_args()

    if args.vcf in '- stdin'.split():
        vcf = pysam.VariantFile(sys.stdin)
    else:
        vcf = pysam.VariantFile(args.vcf)

    if args.fout in '- stdout'.split():
        fout = pysam.VariantFile(sys.stdout, 'w', header=vcf.header)
    else:
        fout = pysam.VariantFile(args.fout, 'w', header=vcf.header)

    fam = parse_famfile(args.famfile)

    for record in filter_denovo_records(vcf, fam, args.max_parents):
        fout.write(record)