if not os.path.isdir(args.jobdir): sys.exit('Error: directory "%s" does not exist' % args.pooldir) if not os.path.exists(args.reffile): sys.exit('Error: reference file "%s" does not exist' % args.reffile) job_path = os.path.abspath(args.jobdir) reference_file = os.path.abspath(args.reffile) # load sequences seqs = dict( [(s.id,s) for s in SeqIO.parse(reference_file,'fasta')] ) summaries = dict(( (name,{'variants':[]}) for name in seqs.keys())) ''' GATK variants ''' print >>sys.stderr, "[ Reading GATK variants ]" vlines = [l.strip('\n') for l in open('%s/GATK/snps.gatk.vcf' % job_path,'rU') if not l.startswith('#')] for l in vlines: v = Variant.from_vcf(l) v.caller = 'gatk' summaries[v.chrom]['variants'].append(v) ''' PacBio variants ''' print >>sys.stderr, "[ Reading GenCons variants ]" glines = [l.strip('\n') for l in gzip.open('%s/data/variants.gff.gz' % job_path,'rb') if not l.startswith('#')] for l in glines: v = Variant.from_gff(l) v.caller = 'gencons' summaries[v.chrom]['variants'].append(v) ''' coverage variants ''' print >>sys.stderr, "[ Reading coverage variants ]" covdata = parse_covdepth('%s/GATK/covdepth' % job_path) covvars = {}