示例#1
0
  if not os.path.isdir(args.jobdir):   sys.exit('Error: directory "%s" does not exist' % args.pooldir)
  if not os.path.exists(args.reffile): sys.exit('Error: reference file "%s" does not exist' % args.reffile)  

  job_path       = os.path.abspath(args.jobdir)
  reference_file = os.path.abspath(args.reffile)

  # load sequences
  seqs = dict( [(s.id,s) for s in SeqIO.parse(reference_file,'fasta')] )

  summaries = dict(( (name,{'variants':[]}) for name in seqs.keys()))
  ''' GATK variants '''
  print >>sys.stderr, "[ Reading GATK variants ]"
  vlines = [l.strip('\n') for l in open('%s/GATK/snps.gatk.vcf' % job_path,'rU') if not l.startswith('#')]
  for l in vlines:
    v = Variant.from_vcf(l)
    v.caller = 'gatk'
    summaries[v.chrom]['variants'].append(v)

  ''' PacBio variants '''
  print >>sys.stderr, "[ Reading GenCons variants ]"
  glines = [l.strip('\n') for l in gzip.open('%s/data/variants.gff.gz' % job_path,'rb') if not l.startswith('#')]
  for l in glines:
    v = Variant.from_gff(l)
    v.caller = 'gencons'
    summaries[v.chrom]['variants'].append(v)

  ''' coverage variants '''
  print >>sys.stderr, "[ Reading coverage variants ]"
  covdata = parse_covdepth('%s/GATK/covdepth' % job_path)
  covvars = {}