def get_args(): global fi global prop global misc global properties_file global genome_name fi = fileutils() # Assign description to the help doc parser = argparse.ArgumentParser( description='Script downloading genome files') parser.add_argument('-p', '--properties_file', type=str, help='''Please provide the properties file, which including workdir''', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name which is provided by genome_list.txt''', required=True) args = parser.parse_args() # check args fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) misc = misc() misc.check_genome_avl(prop.get_attrib("available_genomes"), args.genome_name) # define variables genome_name = args.genome_name print("properties_file:", properties_file) print("genome_name:", genome_name)
def get_args(): global prop global properties_file global prefix global fi fi = fileutils() # Assign description to the help doc parser = argparse.ArgumentParser( description='''Script creates multiQC html file using fastqc, bcftools, snpEff, QUAST, and QualiMap output files''') parser.add_argument( '-p', '--properties_file', type=str, help= 'Please provide the properties file, which including the paths of workdir', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) # check args args = parser.parse_args() fi.check_exist(args.properties_file) # define variables properties_file = args.properties_file prop = properties(properties_file) prefix = args.prefix print("properties_file:", str(properties_file)) print("prefix:", prefix)
def get_args(): global properties_file global genome global prefix global vcf_file_pattern global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script invests genes under selection pressure within species through dNdS. Species can be chosen from genome_list.txt''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name, only with those obtained from genome_list.txt''', required=True) parser.add_argument( '-f', '--vcf_file_pattern', type=str, help="Please provide snp vcf files' pattern with full file path", required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) if args.genome_name not in ( line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines() ) and args.genome_name != "cryptosporidium_hominis": misc.my_exit("{} is not available, please try another genome".format( args.genome_name)) if not re.search(".vcf", args.vcf_file_pattern): misc.my_exit("vcf_file_pattern need to end up with .vcf") genome = args.genome_name vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix print "properties_file:", properties_file print "genome:", genome print "vcf_file_pattern:", vcf_file_pattern print "prefix:", prefix
def get_args(): global fi global prop global properties_file global genome_name global bam_file_pattern global bam_files global mapping_file global prefix global bam_key_pattern # Assign description to the help doc parser = argparse.ArgumentParser(description='Script build all individual chromosome multiple alignment for recombination') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name available in genome_list.txt only''', required=True) parser.add_argument('-bp', '--bam_file_pattern', type=str, help='''Please provide the bam files' pattern with the full path, ending with .bam, with runID in the bam file name''', required=True) parser.add_argument('-m', '--mapping_file', type=str, help='''Please provide the mapping file path, containing one column of the runID and the other column is the expression displayed in the multiple alignment file description line''', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi=fileutils() fi.check_exist(args.properties_file) properties_file=args.properties_file prop=properties(properties_file) if args.genome_name not in (line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines()): misc.my_exit("{} is not available, please try another genome".format(args.genome_name)) if not re.search(".bam$",args.bam_file_pattern): misc.my_exit("bam_file_pattern need to end up with .bam") bam_file_pattern=args.bam_file_pattern bam_files=glob.glob(bam_file_pattern) bam_key_pattern="[A-Z]RR\d{6,}" for bam_file in bam_files: if not re.search(bam_key_pattern, bam_file): misc.my_exit("There is no runID in the bam file {}".format(bam_file)) fi.check_files_exist(bam_files) # define variables genome_name=args.genome_name mapping_file=args.mapping_file prefix=args.prefix print ("properties_file:",properties_file) print ("genome_name:",genome_name) print ("bam_file_pattern:",bam_file_pattern) print ("mapping_file:",mapping_file) print ("prefix:",prefix)
def initiate(): fi=fileutils() dir=prop.workdir+"/assembly" fi.create_processing_dir(prop.workdir+"/quality") fi.create_processing_dir(prop.workdir+"/assembly") fi.create_processing_dir(prop.workdir+"/reference-mapping") fi.create_processing_dir(prop.workdir+"/SNPs") fi.create_processing_dir(prop.workdir+"/structural-recombination") fi.create_processing_dir(prop.workdir+"/hyper-variable-analysis") fi.create_processing_dir(prop.workdir+"/cluster-analysis")
def __init__(self, properties_file, genome_name, genome_fasta, bam_file, prefix, if_anno, subdir): self.properties_file = properties_file self.prop = properties(properties_file) self.genome_name = genome_name self.genome_fasta = genome_fasta self.bam_file = bam_file self.prefix = prefix self.if_anno = if_anno self.subdir = subdir self.fi = fileutils()
def post_process(self): fi = fileutils() indir = self.prop.workdir + "/quality/in/" outdir = self.prop.workdir + "/quality/out/" fi.create_processing_dir(indir) fi.create_processing_dir(outdir) fi.copy_src_into_dest(self.fq1, indir) fi.copy_src_into_dest(self.fq2, indir) fqout1 = self.fq1 + "_val_1.fq" fqout2 = self.fq2 + "_val_2.fq" report = self.fq1 + "_trimming_report.txt" fi.copy_src_into_dest(fqout1, indir) fi.copy_src_into_dest(fqout2, indir) fi.copy_src_into_dest(report, indir)
def get_args(): global properties_file global g_names_str global fi global prop global min_homo # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script invests genes under selection pressure among multiple species through dNdS' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_names', type=str, help='''Please provide the genome names, seperating by "," with the format of XXX, YYY, ZZZ''', required=True) parser.add_argument( '-min', '--min_homo', type=int, help= '''Please provide the minimum poteintial homologue numbers in one group, if not defined, 4 will be used as the default''', required=False) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) # define variables properties_file = args.properties_file prop = properties(properties_file) g_names_str = args.genome_names if args.min_homo is None: min_homo = 4 else: min_homo = args.min_homo print "properties_file:", properties_file print "gnames:", g_names_str print "min_homo:", str(min_homo)
def get_args(): global fi global prop global properties_file global genome_fasta global bam_file_pattern global bam_request_pattern global bam_files global map_fpath global map_dict global prefix # Assign description to the help doc parser = argparse.ArgumentParser(description='''Script creating relocation files for multiple bam files from various genomes and automatically open the GUI''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-m', '--map_file', type=str, help='''Please provide the map file, in which the first column is the full path of the genome fasta file and the second column is the full path of the bam file and the bam files need to ended with .bam''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi=fileutils() fi.check_files_exist([args.properties_file,args.map_file]) # define variables properties_file=args.properties_file prop=properties(properties_file) map_fpath=args.map_file fh_map=open(map_fpath, "r") map_dict={} for line in fh_map: line=line.rstrip() (fasta_fpath,bam_fpath)=getVar(line.split(),[0,1]) fi.check_files_exist([fasta_fpath,bam_fpath]) map_dict[bam_fpath]=fasta_fpath prefix=args.prefix print "properties_file:",properties_file print "map_file:",map_fpath print "prefix:",prefix
def initiate(): print("initiating...") global indir global outdir global workdir global subdir global qcdir subdir = "reference_mapping" workdir = prop.workdir + "/" + subdir fi = fileutils() indir = workdir + "/in/" outdir = workdir + "/out/" qcdir = workdir + "/qc/" + prefix_ori FI.create_processing_dir(workdir) FI.create_processing_dir(indir) FI.create_processing_dir(outdir) FI.create_processing_dir(qcdir) FI.copy_file_to_destdir(fastq1, indir) if fastq2 is not None: FI.copy_file_to_destdir(fastq2, indir)
def get_args(): global properties_file global cds_fna1 global cds_faa1 global cds_fna2 global cds_faa2 global genome1 global genome2 global map_file global filter_eval global filter_identity global prefix global fi global makeblastdb_sw global blastn_sw global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script invests genes under selection pressure between two species through dNdS' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g1', '--genome_name1', type=str, help='''Please provide the first genome name, otherwise, "ch" for "C. hominis" will be used''', required=False) parser.add_argument( '-g2', '--genome_name2', type=str, help='''Please provide the second genome name, otherwise, "cp" for "C. parvum" will be used''', required=False) parser.add_argument( '-fn1', '--cds_fna1', type=str, help= 'Please provide the first cds fna file, otherwise, ch fna file will be used.', required=False) parser.add_argument( '-fn2', '--cds_fna2', type=str, help= 'Please provide the second cds fna file, otherwise, cp fna file will be used.', required=False) parser.add_argument( '-fa1', '--cds_faa1', type=str, help= 'Please provide the first cds faa file, otherwise, ch faa file will be used.', required=False) parser.add_argument( '-fa2', '--cds_faa2', type=str, help= 'Please provide the second cds faa file, otherwise, cp faa file will be used.', required=False) parser.add_argument( '-m', '--map', type=str, help='''Please provide the file for mapping the chromosome accessions, one pair in each line and separated by tab, otherwise, no chromosome information will be provided in the output file''', required=False) parser.add_argument( '-fi', '--filter_identity', type=str, help='the identity percentage for filtering the blast hits.', required=False) parser.add_argument('-fe', '--filter_eval', type=str, help='the eval for filtering the blast hits.', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) for opt_arg_fpath in (args.cds_fna1, args.cds_fna2, args.cds_faa1, args.cds_faa2, args.map): if opt_arg_fpath is not None: fi.check_exist(opt_arg_fpath) # define variables makeblastdb_sw = "makeblastdb" blastn_sw = "blastn" default_gname1 = "ch" default_gname2 = "cp" map_file = "None" filter_eval = "0" filter_identity = "0" filter_length = "0" properties_file = args.properties_file prop = properties(properties_file) if args.genome_name1 is not None: genome1 = args.genome_name1 else: genome1 = default_gname1 if args.genome_name2 is not None: genome2 = args.genome_name2 else: genome2 = default_gname2 if args.cds_fna1 is not None: cds_fna1 = args.cds_fna1 else: cds_fna1 = prop.get_attrib(genome1 + "_cds_fna") if args.cds_faa1 is not None: cds_faa1 = args.cds_faa1 else: cds_faa1 = prop.get_attrib(genome1 + "_cds_faa") if args.cds_fna2 is not None: cds_fna2 = args.cds_fna2 else: cds_fna2 = prop.get_attrib(genome2 + "_cds_fna") if args.cds_faa2 is not None: cds_faa2 = args.cds_faa2 else: cds_faa2 = prop.get_attrib(genome2 + "_cds_faa") if args.map is not None: map_file = args.map if args.filter_eval is not None: filter_eval = args.filter_eval if args.filter_identity is not None: filter_identity = args.filter_identity prefix = args.prefix print "properties_file:", properties_file print "genome1:", genome1 print "genome2:", genome2 print "cds_fna1:", cds_fna1 print "cds_faa1:", cds_faa1 print "cds_fna2:", cds_fna2 print "cds_faa2:", cds_faa2 print "filter_eval:", filter_eval print "filter_identity_perc:", filter_identity print "prefix:", prefix
for tg_out_file in tg_out_files: FI.copy_file_add_prefix(tg_out_file, outdir, prefix + "_") for fastqc_out_file in fastqc_out_files: FI.copy_file_to_destdir(fastqc_out_file, qcdir) FI.copy_file_to_destdir(fastqc_out_file.replace(".zip", ".html"), qcdir) command("cp -p {}.html {}".format(multiqc_out_prefix, qcdir)).run_comm(0) if if_dedup: for deduped_fq in deduped_fqs: FI.copy_file_add_prefix(deduped_fq, outdir, prefix + "_") if __name__ == '__main__': global FI global MISC FI = fileutils() MISC = misc() get_args() print("\n", "Properties attributes:\n", prop.__dict__) #run the initiation code initiate() #execute the main part of the program execute() #post execution code post_process()
def get_args(): global properties_file global fastq1 global fastq2 global qc_sw global rm_dup_sw global prefix global fi global if_dedup fi = fileutils() default_qc_sw = "trim_galore" rm_dup_sw = "clumpify" # Assign description to the help doc parser = argparse.ArgumentParser( description='Script assembles short reads based on some criteria') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-fq1', '--fastq1', type=str, help='Please provide the first fastq file.', required=True) parser.add_argument('-fq2', '--fastq2', type=str, help='Please provide the second fastq file.', required=False) parser.add_argument('-qc_sw', '--qc_software', type=str, help='''Please provide the quality control software, otherwise the default trim_galore will be used''', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) parser.add_argument('-de', '--dedup', action='store_true', help='if remove all the exact read duplications', default=False) # check args args = parser.parse_args() fi.check_files_exist([args.properties_file, args.fastq1]) if args.fastq2 is not None: fi.check_exist(args.fastq2) # define variables properties_file = args.properties_file fastq1 = args.fastq1 fastq2 = args.fastq2 qc_sw = args.qc_software if qc_sw is None: qc_sw = default_qc_sw prefix = args.prefix if_dedup = args.dedup print "properties_file:", properties_file print "fastq1:", fastq1 print "fastq2:", fastq2 print "qc_software:", qc_sw print "prefix:", prefix print "dedup:", if_dedup
def get_args(): global fi global prop global properties_file global genome_name global vcf_file_pattern global vcf_files global mapping_file global image_title global prefix # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script build phylogenetic tree and dendragram for the defined group of vcf files from the same genome' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name, only with those obtained from genome_list.txt''', required=True) parser.add_argument( '-v', '--vcf_file_pattern', type=str, help='''Please provide the vcf files' pattern with the full path, vcf files must ended with ".vcf" ''', required=True) parser.add_argument( '-m', '--mapping_file', type=str, help= '''Please provide the mapping file path, which contains one column of read_ID from vcf file and one column of its corresponding label on the tree branch, otherwise, the read_ID will be labeled on the tree branch''', required=False) parser.add_argument('-t', '--title', type=str, help='''Please provide the title of the image''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) if args.genome_name not in ( line.rstrip() for line in open(prop.get_attrib("available_genomes")).readlines() ) and args.genome_name != "cryptosporidium_hominis": misc.my_exit("{} is not available, please try another genome".format( args.genome_name)) if not re.search(".vcf$", args.vcf_file_pattern): misc.my_exit("vcf_file_pattern need to end up with .vcf") vcf_file_pattern = args.vcf_file_pattern vcf_files = glob.glob(vcf_file_pattern) fi.check_files_exist(vcf_files) # define variables genome_name = args.genome_name mapping_file = args.mapping_file image_title = args.title prefix = args.prefix print "properties_file:", properties_file print "genome_name:", genome_name print "vcf_file_pattern:", vcf_file_pattern print "mapping_file:", mapping_file print "title:", image_title print "prefix:", prefix
run_snpEff() def post_process(): print "post_processing..." fi.copy_file("snpEff_summary.html", "{}/{}_snpEff_summary.html".format(outdir, prefix)) fi.copy_file("snpEff_genes.txt", "{}/{}_snpEff_genes.txt".format(outdir, prefix)) for ann_vcf_fpath in ann_vcf_fpaths: fi.copy_file_to_destdir(ann_vcf_fpath, outdir) #fi.copy_file_to_destdir(ann_csv_fname,outdir) if __name__ == '__main__': getVar = lambda searchList, ind: [searchList[i] for i in ind] fi = fileutils() misc = misc() get_args() print "\n", "Properties attributes:" print prop.__dict__ #run_blast the initiation code initiate() #execute the main part of the program execute() #post execution code post_process()
def get_args(): global fi global properties_file global genome_name global ref_fasta global bam_files global bam_files_str global prefix global filter_dict global default_ref_fasta_root global if_classify fi=fileutils() default_ref_fasta_root='ena_ref_fasta' filter_dict={} # Assign description to the help doc parser = argparse.ArgumentParser(description='Script for getting SNP from bam files using samtools and bcftools') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file, which including the paths of samtools and bcftools and workdir', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name you are mapping to, only \'ch\' or \'cp\' permitted, \'ch\' stands for \'Cryptosporidium hominis\' and \'cp\' stands for \'Cryptosporidium parvum\'''', required=True) parser.add_argument('-r', '--ref_fasta', type=str, help='Please provide reference genome fasta file', required=False) parser.add_argument('-bam', '--bam_files', type=str, nargs='+', help='Please provide one or multiple bam files', required=True) parser.add_argument('-qual', '--QUAL_filter', type=str, help='To filter QUAL(phred-scaled quality score) in vcf file, please provide minimum QUAL value', required=False) parser.add_argument('-dp', '--DP_filter', type=str, help='To filter DP("Raw read depth") in vcf file, please provide the minimum DP value', required=False) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file', required=True) parser.add_argument('-c', '--classify', action='store_true',help='set classifying the result VCF file into coding and non-coding file to true', default=False) args=parser.parse_args() # check args fi.check_exist(args.properties_file) fi.check_files_exist(args.bam_files) if args.genome_name is not "ch" and not "cp": print "ERROR: Please provide the genome name you are mapping to, only \'ch\' or \'cp\' permitted" sys.exit(1) if args.ref_fasta is not None: fi.check_exist(args.ref_fasta) # define variables properties_file=args.properties_file genome_name=args.genome_name if args.ref_fasta is None: ref_fasta=default_ref_fasta_root+'_'+genome_name else: ref_fasta=args.ref_fasta bam_files=args.bam_files bam_files_str="" for bam_file in bam_files: bam_files_str += os.path.abspath(bam_file)+" " if args.QUAL_filter is not None: filter_dict["QUAL"]=args.QUAL_filter if args.DP_filter is not None: filter_dict["DP"]=args.DP_filter prefix=args.prefix if_classify=args.classify # print args print "properties_file:",str(properties_file) print "refrence_genome fasta file:",ref_fasta print "bam_files:",bam_files_str print "QUAL_filter:",args.QUAL_filter print "DP_filter:",args.DP_filter print "prefix:",prefix print "if_classify:",if_classify
def get_args(): global fi global prop global properties_file global genome_name global genome_fasta global bam_file_pattern global bam_request_pattern global bam_files global prefix # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script creating relocation files for multiple bam files from the same genome and automatically open the GUI.''') parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument( '-g', '--genome_name', type=str, help='''if "C. hominis" or "C. parvum" will be used as the genome, please provide "ch" for "C. hominis" or "cp" for "C. parvum"''', required=False) parser.add_argument( '-f', '--genome_fasta', type=str, help='''Please provide the directory for the genome fasta file, if "ch" or "cp" is not the genome name.''', required=False) parser.add_argument( '-b', '--bam_file_pattern', type=str, help='''Please provide the bam files' pattern with the full path''', required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) if args.genome_name is not None and not args.genome_name == "ch" and not args.genome_name == "cp": print "genome name need to be ch or cp" sys.exit(1) if args.genome_fasta is not None: fi.check_exist(args.genome_fasta) bam_file_pattern = args.bam_file_pattern bam_files = glob.glob(bam_file_pattern) bam_request_pattern = "^.*/?(.*?).bam$" for bam_file in bam_files: if not re.search(bam_request_pattern, bam_file): print "bam_file not ended with .bam" sys.exit(1) fi.check_files_exist(bam_files) # define variables properties_file = args.properties_file prop = properties(properties_file) if args.genome_name is not None: genome_name = args.genome_name if args.genome_fasta is not None: genome_fasta = args.genome_fasta else: if args.genome_name is None: print "If no genome_fasta provided, genome name must be provided as ch or cp." sys.exit(1) else: genome_fasta = prop.get_attrib(genome_name + "_fasta") prefix = args.prefix print "properties_file:", properties_file print "genome_name:", genome_name print "genome_fasta:", genome_fasta print "bam_file_pattern:", bam_file_pattern print "prefix:", prefix
def get_args(): global properties_file global genome global prefix global vcf_file_pattern global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= '''Script invests genes under selection pressure within species through dNdS. Species can be chosen from -genome_list, which including 17 genomes. They are the common genomes of protists parasite and existing in snpEff''' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-genome_list', '--genome_list', help="This will display the genome name list", action="store_true") parser.add_argument( '-g', '--genome_name', type=str, help= '''Please provide the genome name, only with those obtained from -genome_list''', required='-genome_list' not in sys.argv) parser.add_argument( '-f', '--vcf_file_pattern', type=str, help="Please provide snp vcf files' pattern with full file path", required='-genome_list' not in sys.argv) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required='-genome_list' not in sys.argv) # check args args = parser.parse_args() if args.genome_list: print get_gene_list_str() sys.exit(0) fi = fileutils() fi.check_exist(args.properties_file) properties_file = args.properties_file prop = properties(properties_file) genome = args.genome_name if genome not in get_gene_list_str().split("\n"): print "ERROR: genome_name {} not in the list of -genome_list".format( genome) sys.exit(1) vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix print "properties_file:", properties_file print "genome:", genome print "vcf_file_pattern:", vcf_file_pattern print "prefix:", prefix
def get_args(): global properties_file global genome global gff global prefix global vcf_file_pattern global prop # Assign description to the help doc parser = argparse.ArgumentParser( description= 'Script invests genes under selection pressure within species through dNdS' ) parser.add_argument('-p', '--properties_file', type=str, help='Please provide the properties file.', required=True) parser.add_argument('-g', '--genome_name', type=str, help='''Please provide the genome name, only "ch" for "C. hominis" or "cp" for "C. parvum" can be used''', required=True) parser.add_argument('-gff', '--genome_gff_file', type=str, help='''Please provide the genome gff file, only C. hominis or C. parvum gff file can be used''', required=False) parser.add_argument( '-f', '--vcf_file_pattern', type=str, help="Please provide vcf files' pattern with full file path", required=True) parser.add_argument('-pre', '--prefix', type=str, help='Please provide the prefix for the output file.', required=True) # check args args = parser.parse_args() fi = fileutils() fi.check_exist(args.properties_file) if args.genome_gff_file is not None: fi.check_exist(args.genome_gff_file) # define variables properties_file = args.properties_file prop = properties(properties_file) if args.genome_name != 'ch' and args.genome_name != 'cp': print "only 'ch' or 'cp' can be used as the genome name" sys.exit(1) else: genome = args.genome_name if args.genome_gff_file is None: gff = prop.get_attrib(genome + "_gff") vcf_file_pattern = args.vcf_file_pattern prefix = args.prefix print "properties_file:", properties_file print "genome:", genome print "genome_gff:", gff print "vcf_file_pattern:", vcf_file_pattern print "prefix:", prefix