示例#1
0
recs = [r for r in reader]
vc = VC.MPileUPVariant(recs,
                       min_cov=MIN_COVERAGE,
                       err_sub=ERR_SUB,
                       expected_strand=args.strand,
                       pval_cutoff=args.pval_cutoff)
vc.call_variant()
print(vc.variant)

if len(vc.variant) == 0:
    os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix))
    print("No SNPs found. END.", file=sys.stderr)
    sys.exit(0)

# (2) for each CCS read, assign a haplotype (or discard if outlier)
pp = VariantPhaser.VariantPhaser(vc)
pp.phase_variant(args.sam_filename,
                 args.fastx_filename,
                 args.output_prefix,
                 partial_ok=args.partial_ok)
pp.haplotypes
pp.haplotypes.get_haplotype_vcf_assignment()

# (3) phase isoforms
seqids = set([
    r.id for r in SeqIO.parse(open(args.fastx_filename),
                              VariantPhaser.type_fa_or_fq(args.fastx_filename))
])
isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp)
if len(isoform_tally) == 0:
    os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix))
示例#2
0
vc.call_variant()
print vc.variant

if len(vc.variant) == 0:
    os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix))
    print >> sys.stderr, "No SNPs found. END."
    sys.exit(0)

# (2) for each CCS read, assign a haplotype (or discard if outlier)
pp = VariantPhaser.VariantPhaser(vc)
pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix, partial_ok=args.partial_ok)
pp.haplotypes
pp.haplotypes.get_haplotype_vcf_assignment()

# (3) phase isoforms
seqids = set([r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename))])
isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp)
if len(isoform_tally) == 0:
    os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix))
    print >> sys.stderr, "No good haps found. END."
    sys.exit(0)
pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally, args.output_prefix)

# (4) clean isoforms
hap_count = VariantPhaseCleaner.make_haplotype_counts(isoform_tally)

# (5) error correct haplotypes
#  if diploid, use exhaustive search
#  otherwise, use hap counts (ToDo: make this work with exhaustive search later)
variants = [ [base.upper() for base,count in vc.variant[pos]] for pos in pp.accepted_pos]
示例#3
0

# (1) read the mpileup and vall variants
reader = sp.MPileUpReader(args.mpileup_filename)
recs = [r for r in reader]
vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff)
vc.call_variant()
print vc.variant

if len(vc.variant) == 0:
    os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix))
    print >> sys.stderr, "No SNPs found. END."
    sys.exit(0)

# (2) for each CCS read, assign a haplotype (or discard if outlier)
pp = VariantPhaser.VariantPhaser(vc)
pp.phase_variant(args.sam_filename, args.fasta_filename, args.output_prefix, partial_ok=args.partial_ok)
pp.haplotypes
pp.haplotypes.get_haplotype_vcf_assignment()

# (3) phase isoforms
seqids = set([r.id for r in SeqIO.parse(open(args.fasta_filename), 'fasta')])
isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp)
if len(isoform_tally) == 0:
    os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix))
    print >> sys.stderr, "No good haps found. END."
    sys.exit(0)
pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally, args.output_prefix)

# (4) clean isoforms
hap_count = VariantPhaseCleaner.make_haplotype_counts(isoform_tally)
示例#4
0
        os.remove(file)

# (1) read the mpileup and vall variants
reader = sp.MPileUpReader(args.mpileup_filename)
recs = [r for r in reader]
vc = VC.MPileUPVariant(recs, min_cov=MIN_COVERAGE, err_sub=ERR_SUB, expected_strand=args.strand, pval_cutoff=args.pval_cutoff)
vc.call_variant()
print(vc.variant)

if len(vc.variant) == 0:
    os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix))
    print("No SNPs found. END.", file=sys.stderr)
    sys.exit(0)

# (2) for each CCS read, assign a haplotype (or discard if outlier)
pp = VariantPhaser.VariantPhaser(vc)
pp.phase_variant(args.sam_filename, args.fastx_filename, args.output_prefix, partial_ok=args.partial_ok)
pp.haplotypes
pp.haplotypes.get_haplotype_vcf_assignment()

# (3) phase isoforms
seqids = set([r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename))])
isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp)
if len(isoform_tally) == 0:
    os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix))
    print("No good haps found. END.", file=sys.stderr)
    sys.exit(0)
pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally, args.output_prefix)

# (4) clean isoforms
hap_count = VariantPhaseCleaner.make_haplotype_counts(isoform_tally)
示例#5
0
def main(args, parser):
    args = parser.parse_args()

    if args.bhFDR is not None:
        print(
            "--bhFDR {0} is given! Will be using Benjamini–Hochberg correction insteaad. --pval_cutoff is ignored."
            .format(args.bhFDR))

    # remove potential past run output
    past_files = [
        args.output + '.NO_SNPS_FOUND', args.output + '.NO_HAPS_FOUND',
        args.output + '.snps', args.output + '.log',
        args.output + '.human_readable.txt', args.output + '.vcf',
        args.output + '.cleaned.human_readable.txt',
        args.output + '.cleaned.vcf'
    ]

    for file in past_files:
        if os.path.exists(file):
            os.remove(file)

    snpsfound = False
    # (0) generate pileups
    f_human1 = open(args.output + '.human_readable_by_pos.txt', 'w')
    f_human1.write("haplotype\thapIdx\tcontig\tpos\tvarIdx\tbase\tcount\n")
    f_human2 = open(args.output + '.human_readable_by_hap.txt', 'w')
    f_human2.write("haplotype\thapIdx\tcontig\tcount\n")

    for mpileupFile, contig, start, end in elitePileups(
            args.bamfile, args.genes, args.assembly, args.output):
        # (1) read the mpileup and vall variants
        reader = sam.MPileUpReader(mpileupFile)
        recs = [r for r in reader]
        vc = VC.MagMPileUPVariant(recs,
                                  min_cov=MIN_COVERAGE,
                                  err_sub=ERR_SUB,
                                  expected_strand='+-',
                                  pval_cutoff=args.pval_cutoff,
                                  bhFDR=args.bhFDR)
        vc.call_variant()
        print(vc.variant)

        if len(vc.variant) != 0:
            snpsfound = True
        else:
            continue

        # we write SNPs with the bases separated by "/" not "|" becuz we haven't phased them yet
        with open(args.output + '.snps', 'a+') as f_snp:
            for pos, v in vc.variant.items():
                f_snp.write("{contig}\t{pos}\t{bases}\t{counts}\n".format(\
                    contig=contig,\
                    pos=pos+1,\
                    bases="/".join([b for (b,c) in v]),\
                    counts="/".join([str(c) for (b,c) in v])))

        # (2) for each CCS read, assign a haplotype (or discard if outlier)
        pp = VariantPhaser.MagVariantPhaser(vc)
        pp.phase_variant(args.bamfile, [contig, start, end],
                         args.output,
                         partial_ok=True)
        print(pp.haplotypes)
        pp.haplotypes.get_haplotype_vcf_assignment()
        pp.haplotypes.write_haplotype_to_humanreadable(contig, f_human1,
                                                       f_human2,
                                                       pp.seq_hap_info)
        os.remove(mpileupFile)
    f_human1.close()
    f_human2.close()

    if not snpsfound:
        os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output))
        os.remove(args.output + '.human_readable.txt')
        print("No SNPs found. END.", file=sys.stderr)
示例#6
0
recs = [r for r in reader]
vc = VC.MPileUPVariant(recs,
                       min_cov=MIN_COVERAGE,
                       err_sub=ERR_SUB,
                       expected_strand=args.strand,
                       pval_cutoff=args.pval_cutoff)
vc.call_variant()
print(vc.variant)

if len(vc.variant) == 0:
    os.system("touch {out}.NO_SNPS_FOUND".format(out=args.output_prefix))
    print("No SNPs found. END.", file=sys.stderr)
    sys.exit(0)

# (2) for each CCS read, assign a haplotype (or discard if outlier)
pp = VariantPhaser.VariantPhaser(vc)
pp.phase_variant(args.sam_filename,
                 args.fastx_filename,
                 args.output_prefix,
                 partial_ok=args.partial_ok)
pp.haplotypes
pp.haplotypes.get_haplotype_vcf_assignment()

# (3) phase isoforms -- not needed for this analysis!
#seqids = set([r.id for r in SeqIO.parse(open(args.fastx_filename), VariantPhaser.type_fa_or_fq(args.fastx_filename))])
#isoform_tally = VariantPhaser.phase_isoforms(args.read_stat, seqids, pp)
#if len(isoform_tally) == 0:
#    os.system("touch {out}.NO_HAPS_FOUND".format(out=args.output_prefix))
#    print("No good haps found. END.", file=sys.stderr)
#    sys.exit(0)
pp.haplotypes.write_haplotype_to_vcf(args.mapping_filename, isoform_tally,