示例#1
0
	if re.search(r"Not required", error) == None and error != 'chr0:0 .. REF allele listed in the ALT field??' and error != '':
		raise Exception('ERROR: vcf [' + vcf_path + '] not valid [' + error + ']')
'''

#must match platform definition - no extra/missing records
#CHROM,POS,ID,REF,ALT must match
#Same sort order - may be able to relax this requirement later
#samples must also match between the summary and vcf


err1 = "ERROR: VCF file [{0}] does not match PLATFORM file [{1}] on {2} at variant record {3}. VCF={4}; PLATFORM={5}"
err2 = "ERROR: VCF file [{0}] does not match PLATFORM file [{1}]. Different line counts."
fields = ["CHROM","POS","ID","REF","ALT"]
with gzip.open(vcf_path, 'rb') as vcf, gzip.open(summary_path, 'rb') as summary:
	#remove header comments from each file and capture header row
	vcfhead = bankfunctions.read_through_headers(vcf,'##')
	#platformhead = bankfunctions.read_through_headers(platform,'##')
	summaryhead = bankfunctions.read_through_headers(summary,'#')
		
	#check samples match between vcf and summary to ensure correct summary file selected (assume posterior file matches by virtue of being in same directory)
	vcf_samples = vcfhead.strip().split('\t')
	pattern = re.compile("\.cel", re.IGNORECASE)
	summary_samples = summaryhead.strip().split('\t')
	summary_samples_no_cel = [pattern.sub("", sample) for sample in summary_samples[1:]]
	if vcf_samples[9:] != summary_samples_no_cel[0:] :
		raise Exception('ERROR: vcf [' + vcf_path + '] samples do not match samples in summary [' + summary_path + ']')
        sample_count = len(summary_samples_no_cel)
'''
# PENDING: Commenting out due to issues with sorted vs not sorted files.  Can safely assume correctly formed at the moment, as all input through GXBANK_CONVERT script	
	#check variants match between vcf and platform
	var_count = 0
示例#2
0
##
filter_num = lambda key,value,criteria: key if value != 'NA' and value != '' and float(value) < criteria else None

##
# FILTER Thresholds
##
thresholds_homRO = {'0':0.6, '1':0.6, '2':0.3, '3':-0.9}

with gzip.open(callFile, 'rb') as calls, \
	gzip.open(confFile, 'rb') as confs, \
     gzip.open(perfFile, 'rb') as perfs, \
	  gzip.open(platformVCF_path,"rb") as plat_vcf, \
	  gzip.open(vcfFileUnsorted,"wb") as vcf :
	  
	#Remove headers
	bankfunctions.read_through_headers(perfs,'#')
	bankfunctions.read_through_headers(plat_vcf,'##')
	#Confirm order of samples in each Axiom file matches
	call = bankfunctions.read_through_headers(calls,'#').strip().split('\t')
	conf = bankfunctions.read_through_headers(confs,'#').strip().split('\t')
	#if len(call) != len(conf) or len(conf) != len(sum) :
	if len(call) != len(conf) :
		raise Exception(err4.format(axiomdir,'AxiomGT1.calls.txt.gz',len(call) - 1,'AxiomGT1.confidences.txt.gz',len(conf) - 1))
	for i in range(1,len(call)) :
		#if call[i] != conf[i] or conf[i] != sum[i] :
		if call[i] != conf[i] :
			raise Exception(err5.format(axiomdir,i,'AxiomGT1.calls.txt.gz',call[i],'AxiomGT1.confidences.txt.gz',conf[i]))
	
   #log number of samples
	logging.info('[%d] samples found in these Axiom files', len(call) - 1)