示例#1
0
    args.output_prefix + '.NO_HAPS_FOUND', args.output_prefix + '.log',
    args.output_prefix + '.human_readable.txt', args.output_prefix + '.vcf',
    args.output_prefix + '.cleaned.human_readable.txt',
    args.output_prefix + '.cleaned.vcf'
]

for file in past_files:
    if os.path.exists(file):
        os.remove(file)

# (1) read the mpileup and vall variants
reader = sp.MPileUpReader(args.mpileup_filename)
recs = [r for r in reader]
vc = VC.MPileUPVariant(recs,
                       min_cov=MIN_COVERAGE,
                       err_sub=ERR_SUB,
                       expected_strand=args.strand,
                       pval_cutoff=args.pval_cutoff)
vc.call_variant()
print(vc.variant)

if len(vc.variant) == 0:
    os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix))
    print("No SNPs found. END.", file=sys.stderr)
    sys.exit(0)

# (2) for each CCS read, assign a haplotype (or discard if outlier)
pp = VariantPhaser.VariantPhaser(vc)
pp.phase_variant(args.sam_filename,
                 args.fastx_filename,
                 args.output_prefix,
示例#2
0
def main(args, parser):
    args = parser.parse_args()

    if args.bhFDR is not None:
        print(
            "--bhFDR {0} is given! Will be using Benjamini–Hochberg correction insteaad. --pval_cutoff is ignored."
            .format(args.bhFDR))

    # remove potential past run output
    past_files = [
        args.output + '.NO_SNPS_FOUND', args.output + '.NO_HAPS_FOUND',
        args.output + '.snps', args.output + '.log',
        args.output + '.human_readable.txt', args.output + '.vcf',
        args.output + '.cleaned.human_readable.txt',
        args.output + '.cleaned.vcf'
    ]

    for file in past_files:
        if os.path.exists(file):
            os.remove(file)

    snpsfound = False
    # (0) generate pileups
    f_human1 = open(args.output + '.human_readable_by_pos.txt', 'w')
    f_human1.write("haplotype\thapIdx\tcontig\tpos\tvarIdx\tbase\tcount\n")
    f_human2 = open(args.output + '.human_readable_by_hap.txt', 'w')
    f_human2.write("haplotype\thapIdx\tcontig\tcount\n")

    for mpileupFile, contig, start, end in elitePileups(
            args.bamfile, args.genes, args.assembly, args.output):
        # (1) read the mpileup and vall variants
        reader = sam.MPileUpReader(mpileupFile)
        recs = [r for r in reader]
        vc = VC.MagMPileUPVariant(recs,
                                  min_cov=MIN_COVERAGE,
                                  err_sub=ERR_SUB,
                                  expected_strand='+-',
                                  pval_cutoff=args.pval_cutoff,
                                  bhFDR=args.bhFDR)
        vc.call_variant()
        print(vc.variant)

        if len(vc.variant) != 0:
            snpsfound = True
        else:
            continue

        # we write SNPs with the bases separated by "/" not "|" becuz we haven't phased them yet
        with open(args.output + '.snps', 'a+') as f_snp:
            for pos, v in vc.variant.items():
                f_snp.write("{contig}\t{pos}\t{bases}\t{counts}\n".format(\
                    contig=contig,\
                    pos=pos+1,\
                    bases="/".join([b for (b,c) in v]),\
                    counts="/".join([str(c) for (b,c) in v])))

        # (2) for each CCS read, assign a haplotype (or discard if outlier)
        pp = VariantPhaser.MagVariantPhaser(vc)
        pp.phase_variant(args.bamfile, [contig, start, end],
                         args.output,
                         partial_ok=True)
        print(pp.haplotypes)
        pp.haplotypes.get_haplotype_vcf_assignment()
        pp.haplotypes.write_haplotype_to_humanreadable(contig, f_human1,
                                                       f_human2,
                                                       pp.seq_hap_info)
        os.remove(mpileupFile)
    f_human1.close()
    f_human2.close()

    if not snpsfound:
        os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output))
        os.remove(args.output + '.human_readable.txt')
        print("No SNPs found. END.", file=sys.stderr)