def check_gff_extension(self): if self.gff_extension != 'gff' and self.gff_extension != 'gtf': print(time_stamp(), "!!WARNING!! {}'s extension is not 'gff' or 'gtf'\n".format( self.args.gff), flush=True) sys.exit(1)
def run_prinseq(self): cmd = 'seq -f %03g {0} | \ xargs -P {0} \ -I % \ prinseq-lite.pl -trim_left 5 \ -trim_right 20 \ -trim_qual_window 10 \ -trim_qual_right 20 \ -min_len 75 \ -min_qual_mean 20 \ -fastq {1}/20_fastq/FaQCs_{2}/{2}.1.trimmed.fastq.split/{2}.1.trimmed.part_%.fastq \ -fastq2 {1}/20_fastq/FaQCs_{2}/{2}.1.trimmed.fastq.split/{2}.2.trimmed.part_%.fastq \ -out_good {1}/20_fastq/prinseq_{2}/{2}.part_% \ -out_bad null \ > {1}/log/prinseq_{2}.log \ 2>&1'.format(self.N_threads, self.args.out, self.index) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd), flush=True) sys.exit(1)
def alignment(self): aln = Alignment(args) aln_args = [] N_files = len(self.args.rna_seq) + len(self.args.whole_genome) N_process, each_threads = get_proc_numbers(self.args.threads, N_files) for i, fastq in enumerate(self.args.rna_seq): fastq1 = fastq.split(',')[0] fastq2 = fastq.split(',')[1] index = 'RNA-seq.0{:0>3}'.format(i) aln_arg = '\t'.join([fastq1, fastq2, index, 'RNA']) aln_args.append(aln_arg) print(time_stamp(), "{}'s prefix -> {}.".format(fastq1, index), flush=True) print(time_stamp(), "{}'s prefix -> {}.".format(fastq2, index), flush=True) for i, fastq in enumerate(self.args.whole_genome): fastq1 = fastq.split(',')[0] fastq2 = fastq.split(',')[1] index = 'WGS.0{:0>3}'.format(i) aln_arg = '\t'.join([fastq1, fastq2, index, 'Genome']) aln_args.append(aln_arg) print(time_stamp(), "{}'s prefix -> {}.".format(fastq1, index), flush=True) print(time_stamp(), "{}'s prefix -> {}.".format(fastq2, index), flush=True) for i in range(N_files): aln_args[i] = aln_args[i] + '\t' + str(each_threads[i]) p = Pool(N_process) p.map(aln.run, aln_args) p.close() print(time_stamp(), 'alignment successfully finished.', flush=True)
def run(self): print(time_stamp(), 'start to annotate the reference genome.', flush=True) self.merge_bam() self.transciptome_assembly() self.make_gff() self.run_gffread()
def run(self): print(time_stamp(), 'start to call variants.', flush=True) chr_names = self.get_header() p = Pool(self.args.threads) p.map(self.mpileup, chr_names) p.close() self.concat() self.mkindex()
def __init__(self, args): self.args = args if self.args.disable_RNAseq_trim: print(time_stamp(), 'disable the trimming of RNA-seq.', flush=True) if self.args.disable_WGS_trim: print(time_stamp(), 'disable the trimming of WGS.', flush=True) if self.args.disable_RNAseq_trim and \ self.args.disable_WGS_trim: print(time_stamp(), 'start to align reads.', flush=True) else: print(time_stamp(), 'start to trim and align reads.', flush=True) os.mkdir('{}/20_fastq'.format(self.args.out)) os.mkdir('{}/30_bam'.format(self.args.out)) self.write_json() os.mkdir('{0}/40_bed/'.format(self.args.out))
def run(self): print(time_stamp(), 'start to index reference fasta.', flush=True) cmd1 = 'hisat2-build -p {0} {1} {1} \ > {2}/log/hisat2-build.log \ 2>&1'.format(self.args.threads, self.args.ref, self.args.out) cmd2 = 'samtools faidx {} \ > {}/log/samtools_faidx.log \ 2>&1'.format(self.args.ref, self.args.out) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) print(time_stamp(), 'hisat2-build...', flush=True) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.args.out, 'hisat2-build', cmd1) sys.exit(1) try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.args.out, 'samtools_faidx', cmd2) sys.exit(1) print(time_stamp(), 'indexing of the reference genome successfully finished.', flush=True)
def check_max_threads(self, args): max_cpu = multi.cpu_count() print( time_stamp(), 'maximum number of threads which you can use is up to {}.'.format( max_cpu), flush=True) if max_cpu <= args.threads: sys.stderr.write( ('!!WARNING!! You can use up to {0} threads. ' 'This program will use {0} threads.\n').format(max_cpu)) sys.stderr.flush() args.threads = max_cpu elif args.threads < 1: args.threads = max_cpu return args
def remove_duplicates(self): cmd = 'cat {0}/candidate_genes_from_*.{1} | \ cut -f 1-9 | \ sort -u > {0}/all_candidate_genes.{1}'.format( self.args.out, self.gff_extension) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd), flush=True) sys.exit(1)
def filter_candidates(self): cmd = 'jiji -a {0}/50_annotation/annotation.gff \ -b {0}/40_bed \ -v {0}/60_vcf/raiden.vcf.gz \ -o {0}/70_result'.format(self.args.out) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd), flush=True) sys.exit(1) shutil.move('{0}/70_result/jiji_PA_bedtools.log'.format(self.args.out), '{0}/log/'.format(self.args.out)) shutil.move('{0}/70_result/jiji_mut_bedtools.log'.format(self.args.out), '{0}/log/'.format(self.args.out))
def gzip_prinseq(self): cmd1 = 'pigz -p {0} \ {1}/20_fastq/prinseq_{2}/{2}_1.fastq'.format( self.N_threads, self.args.out, self.index) cmd2 = 'pigz -p {0} \ {1}/20_fastq/prinseq_{2}/{2}_2.fastq'.format( self.N_threads, self.args.out, self.index) cmd3 = 'pigz -p {0} \ {1}/20_fastq/prinseq_{2}/{2}_1_singletons.fastq'.format( self.N_threads, self.args.out, self.index) cmd4 = 'pigz -p {0} \ {1}/20_fastq/prinseq_{2}/{2}_2_singletons.fastq'.format( self.N_threads, self.args.out, self.index) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) cmd3 = clean_cmd(cmd3) cmd4 = clean_cmd(cmd4) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd1), flush=True) sys.exit(1) try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd2), flush=True) sys.exit(1) try: sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd3), flush=True) sys.exit(1) try: sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd4), flush=True) sys.exit(1)
def gzip_FaQCs(self): cmd1 = 'pigz -p {0} \ {1}/20_fastq/FaQCs_{2}/{2}.1.trimmed.fastq'.format( self.N_threads, self.args.out, self.index) cmd2 = 'pigz -p {0} \ {1}/20_fastq/FaQCs_{2}/{2}.2.trimmed.fastq'.format( self.N_threads, self.args.out, self.index) cmd3 = 'pigz -p {0} \ {1}/20_fastq/FaQCs_{2}/{2}.discard.trimmed.fastq'.format( self.N_threads, self.args.out, self.index) cmd4 = 'pigz -p {0} \ {1}/20_fastq/FaQCs_{2}/{2}.unpaired.trimmed.fastq'.format( self.N_threads, self.args.out, self.index) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) cmd3 = clean_cmd(cmd3) cmd4 = clean_cmd(cmd4) if os.path.isfile('{0}/20_fastq/FaQCs_{1}/{1}.1.trimmed.fastq'.format( self.args.out, self.index)): try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd1), flush=True) sys.exit(1) if os.path.isfile('{0}/20_fastq/FaQCs_{1}/{1}.2.trimmed.fastq'.format( self.args.out, self.index)): try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd2), flush=True) sys.exit(1) if os.path.isfile( '{0}/20_fastq/FaQCs_{1}/{1}.discard.trimmed.fastq'.format( self.args.out, self.index)): try: sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd3), flush=True) sys.exit(1) if os.path.isfile( '{0}/20_fastq/FaQCs_{1}/{1}.unpaired.trimmed.fastq'.format( self.args.out, self.index)): try: sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd4), flush=True) sys.exit(1)
def main(): print(time_stamp(), 'start to filter the causal genes.', flush=True) Jiji(args).run() print(time_stamp(), 'Filtering process successfully finished.\n', flush=True)
def main(): print(time_stamp(), 'start to run RaIDeN.', flush=True) RaIDeN(args).run() print(time_stamp(), 'RaIDeN successfully finished.\n', flush=True)
def merge_fastq(self): cmd1 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_1.fastq \ > {0}/20_fastq/prinseq_{1}/{1}_1.fastq'.format( self.args.out, self.index) cmd2 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_2.fastq > \ {0}/20_fastq/prinseq_{1}/{1}_2.fastq'.format( self.args.out, self.index) cmd3 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_1_singletons.fastq \ > {0}/20_fastq/prinseq_{1}/{1}_1_singletons.fastq'.format( self.args.out, self.index) cmd4 = 'cat {0}/20_fastq/prinseq_{1}/{1}.part_*_2_singletons.fastq \ > {0}/20_fastq/prinseq_{1}/{1}_2_singletons.fastq'.format( self.args.out, self.index) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) cmd3 = clean_cmd(cmd3) cmd4 = clean_cmd(cmd4) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd1), flush=True) sys.exit(1) try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd2), flush=True) sys.exit(1) try: sbp.run(cmd3, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd3), flush=True) sys.exit(1) try: sbp.run(cmd4, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: print(time_stamp(), '!!ERROR!! {}\n'.format(cmd4), flush=True) sys.exit(1)