示例#1
0
"""Assemble transcript using stringtie."""

import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [
    Command(
        "stringtie -G {gene_annotation_path} {input_fpath} -o stringtie.gtf"),
]
OUTPUTS = ["stringtie.gtf"]


class StringtieAssembleTranscripts(SmartTool):
    def pre_run(self, identifier):
        input_fpath = self.input_dataset.item_content_abspath(identifier)

        self.base_command_props.update({'input_fpath': input_fpath})

        self.base_command_props['gene_annotation_path'] = os.environ[
            'GENE_ANNOTATION_PATH']  # NOQA


def main():
    args = parse_args()

    with StringtieAssembleTranscripts(args.input_uri,
                                      args.output_uri) as smart_tool:
        smart_tool.base_commands = BASE_COMMANDS
        smart_tool.outputs = OUTPUTS
        smart_tool(args.identifier)
示例#2
0
"""Run hisat2."""

import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [
    Command(
        "hisat2 --dta -x {reference_prefix} -1 {forward_read_fpath} -2 {reverse_read_fpath} -S OUT.sam"
    ),  # NOQA
    Command("samtools sort OUT.sam -o OUT.bam"),
]

OUTPUTS = [
    'OUT.bam',
]


def find_paired_read(dataset, identifier):
    pair_id = dataset.get_overlay('pair_id')
    return pair_id[identifier]


class AlignSeqsHisat2(SmartTool):
    def pre_run(self, identifier):

        self.base_command_props['reference_prefix'] = os.environ[
            'HISAT2_REFERENCE']  # NOQA
        self.base_command_props[
            'forward_read_fpath'] = self.input_dataset.item_content_abspath(
                identifier)  # NOQA
示例#3
0
import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [
    Command(
        "samtools mpileup -f {reference_path} {input_fpath} -o intermediate.vcf"
    ),
    Command("varscan mpileup2snp intermediate.vcf --output-vcf",
            "varscan.vcf"),
]
OUTPUTS = ["varscan.vcf"]


class VarScan(SmartTool):
    def pre_run(self, identifier):
        input_fpath = self.input_dataset.item_content_abspath(identifier)

        self.base_command_props.update({'input_fpath': input_fpath})

        self.base_command_props['reference_path'] = os.environ[
            'REFERENCE_PATH']  # NOQA


def main():
    args = parse_args()

    with VarScan(args.input_uri, args.output_uri) as smart_tool:
        smart_tool.base_commands = BASE_COMMANDS
        smart_tool.outputs = OUTPUTS
        smart_tool(args.identifier)
示例#4
0
"""Run bowtie2."""

import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [
    Command(
        "bowtie2 -x {reference_prefix} -1 {forward_read_fpath} -2 {reverse_read_fpath} -S {output_fpath}"
    ),  # NOQA
    Command("samtools view -bS OUT.sam -o OUT.bam"),
    Command("samtools sort OUT.bam -o OUT.sorted.bam"),
    Command("samtools index OUT.sorted.bam OUT.sorted.bai"),
]

OUTPUTS = [
    'OUT.sorted.bam',
    'OUT.sorted.bai',
]


def find_paired_read(dataset, identifier):
    pair_id = dataset.get_overlay('pair_id')
    return pair_id[identifier]


class AlignSeqsBowtie2(SmartTool):
    def pre_run(self, identifier):

        self.base_command_props['reference_prefix'] = os.environ[
            'BOWTIE2_REFERENCE']  # NOQA
示例#5
0
"""Quality control of sequencing data using fastqc."""

import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [Command('fastqc -o {working_directory} {input_fpath}')]


class FastQC(SmartTool):
    def pre_run(self, identifier):
        input_fpath = self.input_dataset.item_content_abspath(identifier)

        self.base_command_props.update({
            'input_fpath':
            input_fpath,
            'working_directory':
            self.working_directory,
        })

    def stage_outputs(self, identifier):
        for fname in os.listdir(self.working_directory):
            print(fname)
            fpath = os.path.join(self.working_directory, fname)
            out_id = self.output_proto_dataset.put_item(fpath, fname)
            self.output_proto_dataset.add_item_metadata(
                out_id, 'from', "{}/{}".format(self.input_dataset.uri,
                                               identifier))


def main():
示例#6
0
import os

from smarttoolbase import SmartTool, Command, parse_args

_HERE = os.path.dirname(os.path.realpath(__file__))
_SCRIPT = os.path.join(_HERE, "analysis.py")

BASE_COMMANDS = [Command("python " + _SCRIPT + " {input_fpath} .")]
OUTPUTS = [
    "enhanced_annotated_channel_0.png",
    "enhanced_annotated_channel_1.png",
    "dapi_channel_2.png",
]


class RnaFish3DTool(SmartTool):
    def pre_run(self, identifier):
        input_fpath = self.input_dataset.item_content_abspath(identifier)

        self.base_command_props.update({'input_fpath': input_fpath})


def main():
    args = parse_args()

    with RnaFish3DTool(args.input_uri, args.output_uri) as smart_tool:
        smart_tool.base_commands = BASE_COMMANDS
        smart_tool.outputs = OUTPUTS
        smart_tool(args.identifier)

示例#7
0
The trimmers are specified using the TRIMMOMATIC_TRIMMERS environment variable,
e.g.:

export TRIMMOMATIC_TRIMMERS="ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
"""

import os

from smarttoolbase import SmartTool, Command, parse_args

from dtoolcore.utils import generate_identifier

BASE_COMMANDS = [
    Command(
        "sickle pe -t sanger -f {forward_read_fpath}  -r  {reverse_read_fpath}  -o  sickled_1.fq  -p sickled_2.fq   -s  trash.fq"
    )  # NOQA
]

OUTPUTS = [
    'sickled_1.fq',
    'sickled_2.fq',
]


def find_paired_read(dataset, identifier):
    pair_id = dataset.get_overlay('pair_id')
    return pair_id[identifier]


class TrimSeqsTrimmomatic(SmartTool):
示例#8
0
The trimmers are specified using the TRIMMOMATIC_TRIMMERS environment variable,
e.g.:

export TRIMMOMATIC_TRIMMERS="ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
"""

import os

from smarttoolbase import SmartTool, Command, parse_args

from dtoolcore.utils import generate_identifier

BASE_COMMANDS = [
    Command(
        "trimmomatic PE {forward_read_fpath} {reverse_read_fpath} read_1.fq.gz read_2.fq.gz {trimmers}"
    ),  # NOQA
]

OUTPUTS = [
    'read_1.fq.gz',
    'read_2.fq.gz',
]


def find_paired_read(dataset, identifier):
    pair_id = dataset.get_overlay('pair_id')
    return pair_id[identifier]


class TrimSeqsTrimmomatic(SmartTool):
示例#9
0
"""Estimate transcript abundances using stringtie."""

import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [
    Command(
        "stringtie -e -B -G {merged_stringtie_gene_annotation_path} {input_fpath} -o OUT.gtf"
    ),
]

OUTPUTS = [
    "OUT.gtf",
    "e2t.ctab",
    "e_data.ctab",
    "i2t.ctab",
    "i_data.ctab",
    "t_data.ctab",
]


class StringtieEstimateTranscriptAbundancies(SmartTool):
    def pre_run(self, identifier):
        input_fpath = self.input_dataset.item_content_abspath(identifier)

        self.base_command_props.update({'input_fpath': input_fpath})

        self.base_command_props[
            'merged_stringtie_gene_annotation_path'] = os.environ[
                'MERGED_STRINGTIE_GENE_ANNOTATION_PATH']  # NOQA
示例#10
0
"""Merge transcript using stringtie.

This scripts merges all the files in the input dataset give any identifier in
the input dataset. It is up to the agent calling this script to ensure that
this smarttool is not called once for every item in the input dataset.
"""

import os

from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [
    Command(
        "stringtie --merge -G {gene_annotation_path} {mergelist_fpath} -o stringtie.gtf"
    ),
]
OUTPUTS = ["stringtie.gtf"]


class MergeTranscriptsStringtie(SmartTool):
    def pre_run(self, identifier):

        mergelist_fpath = os.path.join(self.working_directory, "mergelist.txt")
        with open(mergelist_fpath, "w") as fh:
            for i in self.input_dataset.identifiers:
                item_fpath = self.input_dataset.item_content_abspath(i)
                fh.write("{}\n".format(item_fpath))

        self.base_command_props.update({'mergelist_fpath': mergelist_fpath})

        self.base_command_props['gene_annotation_path'] = os.environ[
示例#11
0
from smarttoolbase import SmartTool, Command, parse_args

BASE_COMMANDS = [Command('head -n 4 {input_fpath}', "stdout.txt")]
OUTPUTS = ["stdout.txt"]


class SimpleExampleTool(SmartTool):
    def pre_run(self, identifier):
        input_fpath = self.input_dataset.item_content_abspath(identifier)

        self.base_command_props.update({'input_fpath': input_fpath})


def main():
    args = parse_args()

    with SimpleExampleTool(args.input_uri, args.output_uri) as smart_tool:
        smart_tool.base_commands = BASE_COMMANDS
        smart_tool.outputs = OUTPUTS
        smart_tool(args.identifier)


if __name__ == '__main__':
    main()