示例#1
0
文件: stmp.py 项目: AshleyLab/stmp
		def pgx(args, output_dir=None):
			# consts
			if(output_dir == None):
				args.output = os.path.join(args.output_dir, 'pgx')
			else:
				args.output = output_dir
			if(not os.path.isdir(args.output)):
				os.makedirs(args.output)
			# code
			print str(datetime.datetime.now()) + ': Performing PharmGKB + ClinVar (pgx) annotation'
			#pharmgkb annotation
			pgxUtils.pgx_annotator(args.vcf, os.path.join(resources_path, "pgx_vars", "clinical_ann_metadata-snvs.txt"), os.path.join(args.output, "pharmacogenomics"))
			pgxUtils.star_caller(os.path.join(resources_path, "pgx_haps/"), args.vcf, os.path.join(args.output, "pharmacogenomics"))
			print str(datetime.datetime.now()) + ': Done with pgx/clinvar annotation'
示例#2
0
	def pgx(args):
		# consts
		args.output = args.output_dir
		
		########## HELPER METHODS ###############
		
		#wrapper to annotate snvs/indels
	# 	def annotate_snvs(snv_in, snv_out_stem):
	# 	
	# 		#convert input file to annovar format
	# 		if ".vcf" in snv_in:
	# 			os.system("perl "+cur_path+"/stanovar/convert2annovar.pl "+snv_in+" --includeinfo -format vcf4 > "+snv_out_stem+".snv.annovar.txt")
	# 		else:
	# 			print >> sys.stderr, "Error in annotate_vars.py: Invalid vcf input file format"
	# 			exit(1)
	# 	
	# 		#read in command file
	# 		#yaml_in = "/stanovar/config/annotation_testing_subset.yaml"
	# 		#print("USING TESTING ANNOTATION SUBSET")
	# 		yaml_in = "/stanovar/config/annotation.yaml"
	# 		commands = parse_yaml_commands(snv_out_stem, yaml_in)
	# 		return commands
		
		########## MAIN CODE #####################
	
		print str(datetime.datetime.now()) + ': Performing PharmGKB + ClinVar (pgx) annotation'
		
# 		# GENOTYPING
# 		#pool of processors for concurrent processing
# 		pool = multiprocessing.Pool(processes = int(args.num_threads))
# 
# 		command_call = []
# 		#call targeted genotypes and perform clinvar and PharmGKB annotation, if BAM file provided
# 		if args.input_bam != "None":
# 			if ".bam" in args.input_bam:
# 				# choose appropriate resource files based on specified reference (hg19, GRCh37, etc.). Default file (e.g. pgx_cd_5.7.13.snvs.vcf.gz) is grch37 (does not include chr prefix), other file (pgx_cd_5.7.13_hg19.snvs.vcf.gz) is hg19 (includes chr prefix).
# 				pgx_snv_interval_filename = 'pgx_cd_5.7.13.snvs.vcf.gz_b37.vcf.gz' if args.ref.lower() == 'grch37' else 'pgx_cd_5.7.13.snvs_hg19.vcf.gz' if args.ref.lower() == 'hg19' else ''
# 				pgx_indel_interval_filename = 'pgx_cd_5.7.13.b37.indels.vcf' if args.ref.lower() == 'grch37' else 'pgx_cd_5.7.13.indels.vcf' if args.ref.lower() == 'hg19' else ''
# # 				clinvar_indel_interval_filename = 'clinvar_3.15.13.b37.indels.vcf' if args.ref.lower() == 'grch37' else 'clinvar_3.15.13.indels.vcf' if args.ref.lower() == 'hg19' else ''
# 				#debug
# # 				clinvar_indel_interval_filename = 'clinvar_3.15.13.indels.vcf'
# 				clinvar_indel_interval_filename = 'clinvar_indels.vcf.recode.vcf'
# 				
# 				# then call appropriate commands
# 				cmd = "sh "+cur_path+"/target_snvs.sh "+args.input_bam+" "+cur_path+"/resource/intervals/clinvar.interval_list "+args.output+"/clinvar.snvs"+' '+args.reference_sequence+' '+args.dbsnp
# 				#debug
# 				print 'cmd: ' + cmd
# 				command_call.append(cmd)
# 				
# 				cmd = "sh "+cur_path+"/target_snvs.sh "+args.input_bam+" "+cur_path+"/resource/intervals/"+pgx_snv_interval_filename+" "+args.output+"/pgx.snvs"+' '+args.reference_sequence+' '+args.dbsnp
# 				#debug
# 				print 'cmd: ' + cmd
# 				command_call.append(cmd)
# 				
# 				cmd = "sh "+cur_path+"/target_indels.sh "+args.input_bam+" "+cur_path+"/resource/intervals/"+clinvar_indel_interval_filename+" "+args.output+"/clinvar.indels"+' '+args.reference_sequence+' '+args.dbsnp
# 				#debug
# 				print 'cmd: ' + cmd
# 				command_call.append(cmd)
# 				
# 				cmd = "sh "+cur_path+"/target_indels.sh "+args.input_bam+" "+cur_path+"/resource/intervals/"+pgx_indel_interval_filename+" "+args.output+"/pgx.indels"+' '+args.reference_sequence+' '+args.dbsnp
# 				#debug
# 				print 'cmd: ' + cmd
# 				command_call.append(cmd)
# 
# 				#log file specification
# 				log_file = open(args.output+"/stmp2.log", "w")
# 				log_file.write(">>> Sequence to medical phenotypes log file <<<\n")
# 				log_file.write("\nstmp2 started: "+strftime("%a, %d %b %Y %H:%M:%S +0000", localtime())+"\n\n")
# 
# 				#map calling commands
# 				res_calls = pool.map(call_process, command_call)
# 				for item in res_calls:
# 					log_file.write(item+"\n")
# 
# 				#merge clinvar, and pgx calls and re-annotate rsid
# 				add_rsid(cur_path+"/resource/intervals/"+pgx_snv_interval_filename, args.output+"/pgx.snvs.filtered.vcf", args.output+"/pgx.snvs.annotated.vcf")
# 				add_rsid(cur_path+"/resource/intervals/"+pgx_indel_interval_filename, args.output+"/pgx.indels.filtered.vcf", args.output+"/pgx.indels.annotated.vcf")
# 	#			add_rsid(cur_path+"/resource/intervals/clinvar_3.15.13.snvs.vcf", args.output+"/clinvar.snvs.filtered.vcf", args.output+"/clinvar.snvs.annotated.vcf")
# 	#			add_rsid(cur_path+"/resource/intervals/clinvar_3.15.13.indels.vcf", args.output+"/clinvar.indels.filtered.vcf", args.output+"/clinvar.indels.annotated.vcf")
# 				os.system("sh "+cur_path+"/combine_variants.sh "+args.output+"/pgx.snvs.annotated.vcf "+args.output+"/pgx.indels.annotated.vcf "+args.output+"/pgx.all.vcf")
# 				os.system("sh "+cur_path+"/combine_variants.sh "+args.output+"/clinvar.snvs.filtered.vcf "+args.output+"/clinvar.indels.filtered.vcf "+args.output+"/clinvar.all.vcf")

				#annotate and prioritize clinvar variants using Stanovar
	# 			command_ann_clin = annotate_snvs(args.output+"/clinvar.all.vcf", args.output+"/clinvar")
	# 			res_ann = pool.map(call_process, command_ann_clin)
	# 			for item in res_ann:
	#         			log_file.write(item+"\n")
	# 			os.system("perl "+cur_path+"/stanovar/summarize_annovar.pl "+args.output+"/clinvar"+" "+cur_path+"/stanovar/humandb '"+get_head(args.output+"/clinvar.all.vcf")+"'")
	# 			diseaseUtils.tiers_target(args.output+"/clinvar.genome_summary.tsv", args.output+"/clinvar", args.target_genes, 0.01, args.ethnicity)
	# 			if args.sfs_file != "None":	
	# 				for i in range(1,5):
	# 					diseaseUtils.filter_sfs(args.output+"/clinvar.tier"+str(i)+".txt", args.sfs_file, args.output+"/clinvar.tier"+str(i)+"-sfs_filtered.txt", 2)

		#pharmgkb annotation
		
		pgxUtils.pgx_annotator(args.vcf, os.path.join(resources_path, "pgx_vars", "clinical_ann_metadata-snvs.txt"), os.path.join(args.output, "pharmacogenomics"))
		pgxUtils.star_caller(os.path.join(resources_path, "pgx_haps/"), args.vcf, os.path.join(args.output, "pharmacogenomics"))
		
# 		pgxUtils.pgx_annotator(os.path.join(args.output, "pgx.all.vcf"), os.path.join(cur_path, "resource/pgx_vars/clinical_ann_metadata-snvs.txt"), os.path.join(args.output, "pharmacogenomics"))
# 		pgxUtils.star_caller(os.path.join(cur_path, "resource/pgx_haps/"), os.path.join(args.output, "pgx.all.vcf"), os.path.join(args.output, "pharmacogenomics"))			
# 	else:
# 		print >> sys.stderr, "Error in stmp2.py - input file does not appear to be bam format"
# 		exit(1)

		print str(datetime.datetime.now()) + ': Done with pgx/clinvar annotation'