def _run_minimus2(self, infile, outdir): amos_afg_prefix = os.path.join(outdir, 'minimus2') amos_afg = amos_afg_prefix + '.afg' cmd = 'toAmos -s ' + infile + ' -o ' + amos_afg common.syscall(cmd) cmd = 'minimus2 ' + amos_afg_prefix return common.syscall(cmd, allow_fail=True)
def run_spades(self, stop_at_first_success=False): '''Runs spades on all kmers. Each a separate run because SPAdes dies if any kmer does not work. Chooses the 'best' assembly to be the one with the biggest N50''' n50 = {} kmer_to_dir = {} for k in self.spades_kmers: tmpdir = tempfile.mkdtemp(prefix=self.outdir + '.tmp.spades.' + str(k) + '.', dir=os.getcwd()) kmer_to_dir[k] = tmpdir ok, errs = self.run_spades_once(k, tmpdir) if ok: contigs_fasta = os.path.join(tmpdir, 'contigs.fasta') contigs_fai = contigs_fasta + '.fai' common.syscall(self.samtools.exe() + ' faidx ' + contigs_fasta, verbose=self.verbose) stats = pyfastaq.tasks.stats_from_fai(contigs_fai) if stats['N50'] != 0: n50[k] = stats['N50'] if stop_at_first_success: break if len(n50) > 0: if self.verbose: print('[assemble]\tkmer\tN50') for k in sorted(n50): print('[assemble]', k, n50[k], sep='\t') best_k = None for k in sorted(n50): if best_k is None or n50[k] >= n50[best_k]: best_k = k assert best_k is not None for k, directory in kmer_to_dir.items(): if k == best_k: if self.verbose: print('[assemble] using assembly with kmer', k) os.rename(directory, self.outdir) else: shutil.rmtree(directory) else: raise Error( 'Error running SPAdes. Output directories are:\n ' + '\n '.join(kmer_to_dir.values()) + '\nThe reason why should be in the spades.log file in each directory.' )
def bwa_index(infile, outprefix=None, bwa='bwa', verbose=False): if outprefix is None: outprefix = infile missing = [not os.path.exists(outprefix + '.' + x) for x in index_extensions] if True not in missing: return cmd = ' '.join([ bwa, 'index', '-p', outprefix, infile ]) common.syscall(cmd, verbose=verbose)
def bwa_index(infile, outprefix=None, bwa=None, verbose=False): if bwa is None: bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) if outprefix is None: outprefix = infile missing = [ not os.path.exists(outprefix + '.' + x) for x in index_extensions ] if True not in missing: return cmd = ' '.join([bwa.exe(), 'index', '-p', outprefix, infile]) common.syscall(cmd, verbose=verbose)
def bwa_index(infile, outprefix=None, bwa=None, verbose=False): if bwa is None: bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) if outprefix is None: outprefix = infile missing = [not os.path.exists(outprefix + '.' + x) for x in index_extensions] if True not in missing: return cmd = ' '.join([ bwa.exe(), 'index', '-p', outprefix, infile ]) common.syscall(cmd, verbose=verbose)
def run_spades(self, stop_at_first_success=False): '''Runs spades on all kmers. Each a separate run because SPAdes dies if any kmer does not work. Chooses the 'best' assembly to be the one with the biggest N50''' n50 = {} kmer_to_dir = {} for k in self.spades_kmers: tmpdir = tempfile.mkdtemp(prefix=self.outdir + '.tmp.spades.' + str(k) + '.', dir=os.getcwd()) kmer_to_dir[k] = tmpdir ok, errs = self.run_spades_once(k, tmpdir) if ok: contigs_fasta = os.path.join(tmpdir, 'contigs.fasta') contigs_fai = contigs_fasta + '.fai' common.syscall(self.samtools.exe() + ' faidx ' + contigs_fasta, verbose=self.verbose) stats = pyfastaq.tasks.stats_from_fai(contigs_fai) if stats['N50'] != 0: n50[k] = stats['N50'] if stop_at_first_success: break if len(n50) > 0: if self.verbose: print('[assemble]\tkmer\tN50') for k in sorted(n50): print('[assemble]', k, n50[k], sep='\t') best_k = None for k in sorted(n50): if best_k is None or n50[k] >= n50[best_k]: best_k = k assert best_k is not None for k, directory in kmer_to_dir.items(): if k == best_k: if self.verbose: print('[assemble] using assembly with kmer', k) os.rename(directory, self.outdir) else: shutil.rmtree(directory) else: raise Error('Error running SPAdes. Output directories are:\n ' + '\n '.join(kmer_to_dir.values()) + '\nThe reason why should be in the spades.log file in each directory.')
def run_spades_once(self, kmer, outdir): cmd = ' '.join([ self.spades.exe(), '-s', self.reads, '-k', str(kmer), '--careful', '--only-assembler', '-t', str(self.threads), '-o', outdir, ]) return common.syscall(cmd, verbose=self.verbose, allow_fail=True)
def run_canu(self): '''Runs canu instead of spades''' cmd = self._make_canu_command(self.outdir, 'canu') ok, errs = common.syscall(cmd, verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running Canu.') original_contigs = os.path.join(self.outdir, 'canu.contigs.fasta') renamed_contigs = os.path.join(self.outdir, 'contigs.fasta') Assembler._rename_canu_contigs(original_contigs, renamed_contigs) original_gfa = os.path.join(self.outdir, 'canu.contigs.gfa') renamed_gfa = os.path.join(self.outdir, 'contigs.gfa') os.rename(original_gfa, renamed_gfa)
def run_canu(self): '''Runs canu instead of spades''' cmd = self._make_canu_command(self.outdir,'canu') ok, errs = common.syscall(cmd, verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running Canu.') original_contigs = os.path.join(self.outdir, 'canu.contigs.fasta') renamed_contigs = os.path.join(self.outdir, 'contigs.fasta') Assembler._rename_canu_contigs(original_contigs, renamed_contigs) original_gfa = os.path.join(self.outdir, 'canu.contigs.gfa') renamed_gfa = os.path.join(self.outdir, 'contigs.gfa') os.rename(original_gfa, renamed_gfa)
def run_canu(self): '''Runs canu instead of spades''' n50 = 0 #tmpdir = tempfile.mkdtemp(prefix=self.outdir + '.tmp.canu.', dir=os.getcwd()) #cmd = self._make_canu_command(tmpdir,tmpdir+'canu') cmd = self._make_canu_command(self.outdir, 'canu') ok, errs = common.syscall(cmd, verbose=self.verbose, allow_fail=False) if ok: file = open(os.path.join(self.outdir, 'canu.contigs.fasta')) newFile = open(os.path.join(self.outdir, 'contigs.fasta'), 'w') line = file.readline() while line != '': if len(line) > 0 and line[0] == '>': linelist = line.split() line2 = linelist[0].replace('tig00', 'NODE_') + '_length_' line2 += linelist[1].split('=')[1] + '_cov_' line2 += linelist[3].split('=')[1] + '_ID_' line2 += linelist[0].replace('tig00', '') + '\n' #line2=line.split()[0].replace('tig00','NODE_') newFile.write(line2) else: newFile.write(line) line = file.readline() file.close() newFile.close() contigs_fasta = os.path.join(self.outdir, 'contigs.fasta') contigs_fai = contigs_fasta + '.fai' common.syscall(self.samtools.exe() + ' faidx ' + contigs_fasta, verbose=self.verbose) stats = pyfastaq.tasks.stats_from_fai(contigs_fai) if stats['N50'] != 0: n50 = stats['N50'] #if self.verbose: # print('[assemble]\tN50 '+str(n50[0])) else: raise Error('Error running Canu.')
def bwa_mem( ref, reads, outfile, threads=1, bwa_options = '-x pacbio', verbose=False, index=None ): samtools = external_progs.make_and_check_prog('samtools', verbose=verbose) bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) unsorted_bam = outfile + '.tmp.unsorted.bam' tmp_index = outfile + '.tmp.bwa_index' bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa.exe()) cmd = ' '.join([ bwa.exe(), 'mem', bwa_options, '-t', str(threads), tmp_index, reads, '|', samtools.exe(), 'view', '-F 0x0800', '-T', ref, '-b', '-o', unsorted_bam, '-', ]) common.syscall(cmd, verbose=verbose) bwa_index_clean(tmp_index) threads = min(4, threads) thread_mem = int(500 / threads) cmd = ' '.join([ samtools.exe(), 'sort', '-@', str(threads), '-m', str(thread_mem) + 'M', unsorted_bam, outfile[:-4] ]) common.syscall(cmd, verbose=verbose) os.unlink(unsorted_bam) cmd = samtools.exe() + ' index ' + outfile common.syscall(cmd, verbose=verbose)
def run_spades_once(self, kmer, outdir): cmd = self._make_spades_command(kmer, outdir) return common.syscall(cmd, verbose=self.verbose, allow_fail=True)
def bwa_mem( ref, reads, outfile, threads=1, bwa_options = '-x pacbio', verbose=False, index=None ): samtools = external_progs.make_and_check_prog('samtools', verbose=verbose) bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) unsorted_bam = outfile + '.tmp.unsorted.bam' tmp_index = outfile + '.tmp.bwa_index' bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa) cmd = ' '.join([ bwa.exe(), 'mem', bwa_options, '-t', str(threads), tmp_index, reads, '|', samtools.exe(), 'view', '-F 0x0800', '-T', ref, '-b', '-o', unsorted_bam, '-', ]) common.syscall(cmd, verbose=verbose) bwa_index_clean(tmp_index) threads = min(4, threads) thread_mem = int(500 / threads) # here we have to check for the version of samtools, starting from 1.3 the # -o flag is used for specifying the samtools sort output-file. # Starting from 1.2 you can use the -o flag, but can't have # -o out.bam at the end of the call, so use new style from 1.3 onwards. outparam = '' if samtools.version_at_least('1.3'): outparam = '-o' samout = outfile else: samout = outfile[:-4] cmd = ' '.join([ samtools.exe(), 'sort', '-@', str(threads), '-m', str(thread_mem) + 'M', unsorted_bam, outparam,samout ]) common.syscall(cmd, verbose=verbose) os.unlink(unsorted_bam) cmd = samtools.exe() + ' index ' + outfile common.syscall(cmd, verbose=verbose)
def run_racon(self): '''Runs minimap, miniasm, racon instead of spades''' if self.data_type.split('-')[0] == 'pacbio': overlapRaadsType = 'ava-pb' # PacBio else: overlapReadsType = 'ava-ont' # Nanopore # minimap2 cmd = [ self.minimap2.exe(), '-t', self.threads, '-x', overlapReadsType, self.reads, self.reads, '>', os.path.join(self.outdir, 'output.paf') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running minimap2.') # miniasm cmd = [ self.miniasm.exe(), '-Rc2', '-f', self.reads, os.path.join(self.outdir, 'output.paf'), '>', os.path.join(self.outdir, 'output.gfa') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running miniasm.') # gfa2fasta cmd = [ self.awk.exe(), '/^S/{print ">"$2"\n"$3}', os.path.join(self.outdir, 'output.gfa'), '|', 'fold ' '>', os.path.join(self.outdir, 'output.gfa.fasta') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running awk.') if self.data_type.split('-')[0] == 'pacbio': mapRaadsType = 'map-pb' # PacBio else: mapReadsType = 'map-ont' # Nanopore # Correction 1 # minimap2 cmd = [ self.minimap2.exe(), '-t', self.threads, '-ax', mapRaadsType, os.path.join(self.outdir, 'output.gfa.fasta'), self.reads, '>', os.path.join(self.outdir, 'output.gfa1.sam') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running minimap2 correction step #1.') # Racon 1 cmd = [ self.racon.exe(), '-t', self.threads, self.reads, os.path.join(self.outdir, 'output.gfa1.sam'), os.path.join(self.outdir, 'output.gfa.fasta'), '>', os.path.join(self.outdir, 'output.racon1.fasta') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running racon correction step #1.') # Correction 2 # minimap2 2 cmd = [ self.minimap2.exe(), '-t', self.threads, '-ax map-pb', os.path.join(self.outdir, 'output.racon1.fasta'), self.reads, '>', os.path.join(self.outdir, 'output.gfa2.sam') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running minimap2 correction step #2.') # Racon 2 cmd = [ self.racon.exe(), '-t', self.threads, self.reads, os.path.join(self.outdir, 'output.gfa2.sam'), os.path.join(self.outdir, 'output.racon1.fasta'), '>', os.path.join(self.outdir, 'output.racon2.fasta') ] ok, errs = common.syscall(' '.join(cmd), verbose=self.verbose, allow_fail=False) if not ok: raise Error('Error running racon correction step #2.') original_gfa = os.path.join(self.outdir, 'output.gfa') renamed_gfa = os.path.join(self.outdir, 'contigs.gfa') os.rename(original_gfa, renamed_gfa) original_contigs = os.path.join(self.outdir, 'output.racon2.fasta') renamed_contigs = os.path.join(self.outdir, 'contigs.fasta') os.rename(original_contigs, renamed_contigs)
def bwa_mem(ref, reads, outfile, threads=1, bwa_options='-x pacbio', verbose=False, index=None): samtools = external_progs.make_and_check_prog('samtools', verbose=verbose) bwa = external_progs.make_and_check_prog('bwa', verbose=verbose) unsorted_bam = outfile + '.tmp.unsorted.bam' tmp_index = outfile + '.tmp.bwa_index' bwa_index(ref, outprefix=tmp_index, verbose=verbose, bwa=bwa) cmd = ' '.join([ bwa.exe(), 'mem', bwa_options, '-t', str(threads), tmp_index, reads, '|', samtools.exe(), 'view', '-F 0x0800', '-T', ref, '-b', '-o', unsorted_bam, '-', ]) common.syscall(cmd, verbose=verbose) bwa_index_clean(tmp_index) threads = min(4, threads) thread_mem = int(500 / threads) # here we have to check for the version of samtools, starting from 1.3 the # -o flag is used for specifying the samtools sort output-file. # Starting from 1.2 you can use the -o flag, but can't have # -o out.bam at the end of the call, so use new style from 1.3 onwards. outparam = '' if samtools.version_at_least('1.3'): outparam = '-o' samout = outfile else: samout = outfile[:-4] cmd = ' '.join([ samtools.exe(), 'sort', '-@', str(threads), '-m', str(thread_mem) + 'M', unsorted_bam, outparam, samout ]) common.syscall(cmd, verbose=verbose) os.unlink(unsorted_bam) cmd = samtools.exe() + ' index ' + outfile common.syscall(cmd, verbose=verbose)