def run_porechop(minion_reads, output_directory, threads, logfile=None): chopped_reads = os.path.join(output_directory, 'minION_chopped.fastq.gz') cmd = 'porechop -i {minion_reads} -o {chopped_reads} -t {threads}'.format(minion_reads=minion_reads, chopped_reads=chopped_reads, threads=threads) run_cmd(cmd, logfile=logfile) return chopped_reads
def run_nanoplot(fastq_file, output_directory, threads): if os.path.isfile(os.path.join(output_directory, 'NanoPlot-report.html')): return cmd = 'NanoPlot -t {threads} -o {output_directory} --fastq_rich {fastq_file}'.format( threads=threads, output_directory=output_directory, fastq_file=fastq_file) run_cmd(cmd)
def correct_illumina(forward_reads, reverse_reads, output_directory, threads, logfile=None): forward_corrected = os.path.join(output_directory, os.path.split(forward_reads.replace('.fastq.gz', '_corrected.fastq.gz'))[1]) reverse_corrected = os.path.join(output_directory, os.path.split(reverse_reads.replace('.fastq.gz', '_corrected.fastq.gz'))[1]) cmd = 'tadpole.sh in={forward_reads} in2={reverse_reads} out={forward_corrected} out2={reverse_corrected} ' \ 'mode=correct threads={threads}'.format(forward_reads=forward_reads, reverse_reads=reverse_reads, forward_corrected=forward_corrected, reverse_corrected=reverse_corrected, threads=threads) run_cmd(cmd, logfile=logfile) return forward_corrected, reverse_corrected
def trim_illumina(forward_reads, reverse_reads, output_directory, threads, logfile=None): forward_trimmed = os.path.join(output_directory, os.path.split(forward_reads.replace('.fastq.gz', '_trimmed.fastq.gz'))[1]) reverse_trimmed = os.path.join(output_directory, os.path.split(reverse_reads.replace('.fastq.gz', '_trimmed.fastq.gz'))[1]) cmd = 'bbduk.sh in={forward_reads} in2={reverse_reads} out={forward_trimmed} out2={reverse_trimmed} ' \ 'qtrim=w trimq=10 ref=adapters minlength=50 threads={threads}'.format(forward_reads=forward_reads, reverse_reads=reverse_reads, forward_trimmed=forward_trimmed, reverse_trimmed=reverse_trimmed, threads=threads) run_cmd(cmd, logfile=logfile) return forward_trimmed, reverse_trimmed
def run_unicycler(forward_reads, reverse_reads, long_reads, output_directory, threads, logfile=None, conservative=False): if conservative: runmode = "conservative" else: runmode ="normal" cmd = 'unicycler -1 {forward_reads} -2 {reverse_reads} -l {long_reads} -o {output_directory} -t {threads} ' \ '--no_correct --min_fasta_length 1000 --keep 0 --mode {runmode}'.format(forward_reads=forward_reads, reverse_reads=reverse_reads, long_reads=long_reads, output_directory=output_directory, threads=threads, runmode=runmode) run_cmd(cmd, logfile=logfile)
def subsample_via_filtlong(minion_reads, illumina_forward, illumina_reverse, output_directory, target_bases=250000000, logfile=None): logging.info('Targeting {} bases for read subsampling...'.format(target_bases)) filtered_reads = os.path.join(output_directory, 'length_filtered_reads.fastq') cmd = 'filtlong -t {target_bases} -1 {illumina_forward} -2 {illumina_reverse} {minion_reads} > {filtered_reads}'.format(target_bases=target_bases, illumina_forward=illumina_forward, illumina_reverse=illumina_reverse, minion_reads=minion_reads, filtered_reads=filtered_reads) run_cmd(cmd, logfile=logfile) return filtered_reads