示例#1
0
# parser.add_argument('--json', default="/dev/null")
parser.add_argument('summary', nargs='?', type=argparse.FileType('w'), default=sys.stdout)

args = parser.parse_args()

import json
from Bio import SeqIO
from postanalysis.variant import Variant

''' Load references '''
seqs = [(s.id,s) for s in SeqIO.parse(args.reffile,'fasta')]
sdict = dict(seqs)

''' '''
variants = {}
vlines = [l.strip('\n') for l in open(args.vcffile,'rU') if not l.startswith('#')]
for l in vlines:
  v = Variant.from_vcf(l)
  v.caller = 'gatk'
  if v.chrom not in variants:
    variants[v.chrom] = []
  variants[v.chrom].append(v)

''' Output summary information '''
print >>args.summary, 'reference\tvariants'
for ref,seq in seqs:
  if ref in variants:
    print >>args.summary, '%s\t%d' % (ref,len(variants[ref]))
  else:
    print >>args.summary, '%s\t0' % ref
示例#2
0
        )


''' Load references '''
seqs = [(s.id, s) for s in SeqIO.parse(args.reffile, 'fasta')]
sdict = dict((ref, Reference(name=ref)) for ref, seq in seqs)
''' Summarize coverage data '''
covdata = parse_covdepth_samtools(args.covfile,
                                  reflens=dict(
                                      (s[0], len(s[1])) for s in seqs))
for ref, seq in seqs:
    p, m = summarize_coverage(covdata[ref])
    sdict[ref].pct_cov = p
    sdict[ref].mean_cov = m
    ncvars = find_nocov_variants(covdata[ref], chrom=ref, caller='samdepth')
    if ncvars is not None: sdict[ref].dips.extend(ncvars)
''' Analyze variants '''
glines = [
    l.strip('\n') for l in gzip.open(args.gfffile, 'rb')
    if not l.startswith('#')
]
for l in glines:
    v = Variant.from_gff(l)
    v.caller = 'gencons'
    sdict[v.chrom].variants.append(v)
''' Output summary information '''
print >> args.summary, 'ref\tpct_cov\tmean_cov\tnvars\tndips\tcall'
for ref, seq in seqs:
    call = sdict[ref].make_call(seq)
    print >> args.summary, '%s\t%s' % (sdict[ref].summary(), call)
    return '%s\t%.1f\t%.1f\t%d\t%d' % (self.name, 
                                             self.pct_cov*100, self.mean_cov, 
                                             len(self.variants), len(self.dips), 
                                       )

''' Load references '''
seqs = [(s.id,s) for s in SeqIO.parse(args.reffile,'fasta')]
sdict = dict((ref,Reference(name=ref)) for ref,seq in seqs)

''' Summarize coverage data '''
covdata = parse_covdepth_samtools(args.covfile,reflens=dict((s[0],len(s[1])) for s in seqs))
for ref,seq in seqs:
  p,m = summarize_coverage(covdata[ref])
  sdict[ref].pct_cov = p
  sdict[ref].mean_cov = m
  ncvars = find_nocov_variants(covdata[ref],chrom=ref,caller='samdepth')
  if ncvars is not None: sdict[ref].dips.extend(ncvars)

''' Analyze variants '''  
glines = [l.strip('\n') for l in gzip.open(args.gfffile,'rb') if not l.startswith('#')]
for l in glines:
    v = Variant.from_gff(l)
    v.caller = 'gencons'
    sdict[v.chrom].variants.append(v)

''' Output summary information '''
print >>args.summary, 'ref\tpct_cov\tmean_cov\tnvars\tndips\tcall'
for ref,seq in seqs:
  call = sdict[ref].make_call(seq)
  print >>args.summary, '%s\t%s' % (sdict[ref].summary(),call)