示例#1
0
  seqs = dict( [(s.id,s) for s in SeqIO.parse(reference_file,'fasta')] )

  summaries = dict(( (name,{'variants':[]}) for name in seqs.keys()))
  ''' GATK variants '''
  print >>sys.stderr, "[ Reading GATK variants ]"
  vlines = [l.strip('\n') for l in open('%s/GATK/snps.gatk.vcf' % job_path,'rU') if not l.startswith('#')]
  for l in vlines:
    v = Variant.from_vcf(l)
    v.caller = 'gatk'
    summaries[v.chrom]['variants'].append(v)

  ''' PacBio variants '''
  print >>sys.stderr, "[ Reading GenCons variants ]"
  glines = [l.strip('\n') for l in gzip.open('%s/data/variants.gff.gz' % job_path,'rb') if not l.startswith('#')]
  for l in glines:
    v = Variant.from_gff(l)
    v.caller = 'gencons'
    summaries[v.chrom]['variants'].append(v)

  ''' coverage variants '''
  print >>sys.stderr, "[ Reading coverage variants ]"
  covdata = parse_covdepth('%s/GATK/covdepth' % job_path)
  covvars = {}
  for ref in covdata.keys():
    assert ref in summaries, "Error: ref %s is not in summaries" % ref
    result = find_variants(covdata[ref],seqs[ref],ref,exclude_edges=True,exclude_overlaps=True)
    summaries[ref]['mean_cov'] = result['mean_cov']
    summaries[ref]['pct_cov'] = result['pct_cov']
    if 'variants' in result:
      covvars[ref] = {'variants':[]}
      for v in result['variants']: