"""Assemble transcript using stringtie.""" import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [ Command( "stringtie -G {gene_annotation_path} {input_fpath} -o stringtie.gtf"), ] OUTPUTS = ["stringtie.gtf"] class StringtieAssembleTranscripts(SmartTool): def pre_run(self, identifier): input_fpath = self.input_dataset.item_content_abspath(identifier) self.base_command_props.update({'input_fpath': input_fpath}) self.base_command_props['gene_annotation_path'] = os.environ[ 'GENE_ANNOTATION_PATH'] # NOQA def main(): args = parse_args() with StringtieAssembleTranscripts(args.input_uri, args.output_uri) as smart_tool: smart_tool.base_commands = BASE_COMMANDS smart_tool.outputs = OUTPUTS smart_tool(args.identifier)
"""Run hisat2.""" import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [ Command( "hisat2 --dta -x {reference_prefix} -1 {forward_read_fpath} -2 {reverse_read_fpath} -S OUT.sam" ), # NOQA Command("samtools sort OUT.sam -o OUT.bam"), ] OUTPUTS = [ 'OUT.bam', ] def find_paired_read(dataset, identifier): pair_id = dataset.get_overlay('pair_id') return pair_id[identifier] class AlignSeqsHisat2(SmartTool): def pre_run(self, identifier): self.base_command_props['reference_prefix'] = os.environ[ 'HISAT2_REFERENCE'] # NOQA self.base_command_props[ 'forward_read_fpath'] = self.input_dataset.item_content_abspath( identifier) # NOQA
import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [ Command( "samtools mpileup -f {reference_path} {input_fpath} -o intermediate.vcf" ), Command("varscan mpileup2snp intermediate.vcf --output-vcf", "varscan.vcf"), ] OUTPUTS = ["varscan.vcf"] class VarScan(SmartTool): def pre_run(self, identifier): input_fpath = self.input_dataset.item_content_abspath(identifier) self.base_command_props.update({'input_fpath': input_fpath}) self.base_command_props['reference_path'] = os.environ[ 'REFERENCE_PATH'] # NOQA def main(): args = parse_args() with VarScan(args.input_uri, args.output_uri) as smart_tool: smart_tool.base_commands = BASE_COMMANDS smart_tool.outputs = OUTPUTS smart_tool(args.identifier)
"""Run bowtie2.""" import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [ Command( "bowtie2 -x {reference_prefix} -1 {forward_read_fpath} -2 {reverse_read_fpath} -S {output_fpath}" ), # NOQA Command("samtools view -bS OUT.sam -o OUT.bam"), Command("samtools sort OUT.bam -o OUT.sorted.bam"), Command("samtools index OUT.sorted.bam OUT.sorted.bai"), ] OUTPUTS = [ 'OUT.sorted.bam', 'OUT.sorted.bai', ] def find_paired_read(dataset, identifier): pair_id = dataset.get_overlay('pair_id') return pair_id[identifier] class AlignSeqsBowtie2(SmartTool): def pre_run(self, identifier): self.base_command_props['reference_prefix'] = os.environ[ 'BOWTIE2_REFERENCE'] # NOQA
"""Quality control of sequencing data using fastqc.""" import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [Command('fastqc -o {working_directory} {input_fpath}')] class FastQC(SmartTool): def pre_run(self, identifier): input_fpath = self.input_dataset.item_content_abspath(identifier) self.base_command_props.update({ 'input_fpath': input_fpath, 'working_directory': self.working_directory, }) def stage_outputs(self, identifier): for fname in os.listdir(self.working_directory): print(fname) fpath = os.path.join(self.working_directory, fname) out_id = self.output_proto_dataset.put_item(fpath, fname) self.output_proto_dataset.add_item_metadata( out_id, 'from', "{}/{}".format(self.input_dataset.uri, identifier)) def main():
import os from smarttoolbase import SmartTool, Command, parse_args _HERE = os.path.dirname(os.path.realpath(__file__)) _SCRIPT = os.path.join(_HERE, "analysis.py") BASE_COMMANDS = [Command("python " + _SCRIPT + " {input_fpath} .")] OUTPUTS = [ "enhanced_annotated_channel_0.png", "enhanced_annotated_channel_1.png", "dapi_channel_2.png", ] class RnaFish3DTool(SmartTool): def pre_run(self, identifier): input_fpath = self.input_dataset.item_content_abspath(identifier) self.base_command_props.update({'input_fpath': input_fpath}) def main(): args = parse_args() with RnaFish3DTool(args.input_uri, args.output_uri) as smart_tool: smart_tool.base_commands = BASE_COMMANDS smart_tool.outputs = OUTPUTS smart_tool(args.identifier)
The trimmers are specified using the TRIMMOMATIC_TRIMMERS environment variable, e.g.: export TRIMMOMATIC_TRIMMERS="ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36" """ import os from smarttoolbase import SmartTool, Command, parse_args from dtoolcore.utils import generate_identifier BASE_COMMANDS = [ Command( "sickle pe -t sanger -f {forward_read_fpath} -r {reverse_read_fpath} -o sickled_1.fq -p sickled_2.fq -s trash.fq" ) # NOQA ] OUTPUTS = [ 'sickled_1.fq', 'sickled_2.fq', ] def find_paired_read(dataset, identifier): pair_id = dataset.get_overlay('pair_id') return pair_id[identifier] class TrimSeqsTrimmomatic(SmartTool):
The trimmers are specified using the TRIMMOMATIC_TRIMMERS environment variable, e.g.: export TRIMMOMATIC_TRIMMERS="ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36" """ import os from smarttoolbase import SmartTool, Command, parse_args from dtoolcore.utils import generate_identifier BASE_COMMANDS = [ Command( "trimmomatic PE {forward_read_fpath} {reverse_read_fpath} read_1.fq.gz read_2.fq.gz {trimmers}" ), # NOQA ] OUTPUTS = [ 'read_1.fq.gz', 'read_2.fq.gz', ] def find_paired_read(dataset, identifier): pair_id = dataset.get_overlay('pair_id') return pair_id[identifier] class TrimSeqsTrimmomatic(SmartTool):
"""Estimate transcript abundances using stringtie.""" import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [ Command( "stringtie -e -B -G {merged_stringtie_gene_annotation_path} {input_fpath} -o OUT.gtf" ), ] OUTPUTS = [ "OUT.gtf", "e2t.ctab", "e_data.ctab", "i2t.ctab", "i_data.ctab", "t_data.ctab", ] class StringtieEstimateTranscriptAbundancies(SmartTool): def pre_run(self, identifier): input_fpath = self.input_dataset.item_content_abspath(identifier) self.base_command_props.update({'input_fpath': input_fpath}) self.base_command_props[ 'merged_stringtie_gene_annotation_path'] = os.environ[ 'MERGED_STRINGTIE_GENE_ANNOTATION_PATH'] # NOQA
"""Merge transcript using stringtie. This scripts merges all the files in the input dataset give any identifier in the input dataset. It is up to the agent calling this script to ensure that this smarttool is not called once for every item in the input dataset. """ import os from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [ Command( "stringtie --merge -G {gene_annotation_path} {mergelist_fpath} -o stringtie.gtf" ), ] OUTPUTS = ["stringtie.gtf"] class MergeTranscriptsStringtie(SmartTool): def pre_run(self, identifier): mergelist_fpath = os.path.join(self.working_directory, "mergelist.txt") with open(mergelist_fpath, "w") as fh: for i in self.input_dataset.identifiers: item_fpath = self.input_dataset.item_content_abspath(i) fh.write("{}\n".format(item_fpath)) self.base_command_props.update({'mergelist_fpath': mergelist_fpath}) self.base_command_props['gene_annotation_path'] = os.environ[
from smarttoolbase import SmartTool, Command, parse_args BASE_COMMANDS = [Command('head -n 4 {input_fpath}', "stdout.txt")] OUTPUTS = ["stdout.txt"] class SimpleExampleTool(SmartTool): def pre_run(self, identifier): input_fpath = self.input_dataset.item_content_abspath(identifier) self.base_command_props.update({'input_fpath': input_fpath}) def main(): args = parse_args() with SimpleExampleTool(args.input_uri, args.output_uri) as smart_tool: smart_tool.base_commands = BASE_COMMANDS smart_tool.outputs = OUTPUTS smart_tool(args.identifier) if __name__ == '__main__': main()