Пример #1
0
    def handle(self, *args, **options):
        tool_name = 'Bowtie'
        tool_version = '1.0.0'

        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(
                tool_name, tool_version))
            return True

        settings = configparser.ConfigParser()
        settings.read(
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         '../../settings.ini'))

        tool_settings = settings["{0} {1}".format(tool_name, tool_version)]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Bowtie is an ultrafast, memory-efficient short read aligner. It aligns short DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to keep its memory footprint small: typically about 2.2 GB for the human genome (2.9 GB for paired-end).')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://bowtie-bio.sourceforge.net/index.shtml', \
                               flow_bp=flow_bp )
        tool.save()

        # the reference sequence
        #self.add_toolfiletype( tool, 'i', 'FASTA (nucleotide)', True, 'Nucleotide reference FASTA file' )

        #self.add_toolfiletype( tool, 'i', 'FASTQ (paired reads, left)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTQ (paired reads, right)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTQ (unpaired reads)', False )

        #self.add_toolfiletype( tool, 'i', 'FASTA (paired reads, left)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTA (paired reads, right)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTA (unpaired reads)', False )

        #self.add_toolfiletype( tool, 'o', 'FASTQ (paired reads, left)', False )
        #self.add_toolfiletype( tool, 'o', 'FASTQ (paired reads, right)', False )
        #self.add_toolfiletype( tool, 'o', 'FASTQ (unpaired reads)', False )

        # Add a conditional flow that will run if bowtie-build is needed.
        #  Perhaps crude, this is done by using the name of the reference input FASTA file
        #  and looking for a '.1.ebwt' suffix
        bowtie_build_flow_bp = FlowBlueprint( type='s', \
                                              description='Runs bowtie-build if an index file isn\'t detected.', \
                                              conditional_code='' )
    def handle(self, *args, **options):
        tool_name = 'Bowtie'
        tool_version = '1.0.0'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Bowtie is an ultrafast, memory-efficient short read aligner. It aligns short DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to keep its memory footprint small: typically about 2.2 GB for the human genome (2.9 GB for paired-end).')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://bowtie-bio.sourceforge.net/index.shtml', \
                               flow_bp=flow_bp )
        tool.save()

        # the reference sequence
        #self.add_toolfiletype( tool, 'i', 'FASTA (nucleotide)', True, 'Nucleotide reference FASTA file' )

        #self.add_toolfiletype( tool, 'i', 'FASTQ (paired reads, left)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTQ (paired reads, right)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTQ (unpaired reads)', False )

        #self.add_toolfiletype( tool, 'i', 'FASTA (paired reads, left)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTA (paired reads, right)', False )
        #self.add_toolfiletype( tool, 'i', 'FASTA (unpaired reads)', False )
        
        #self.add_toolfiletype( tool, 'o', 'FASTQ (paired reads, left)', False )
        #self.add_toolfiletype( tool, 'o', 'FASTQ (paired reads, right)', False )
        #self.add_toolfiletype( tool, 'o', 'FASTQ (unpaired reads)', False )

        # Add a conditional flow that will run if bowtie-build is needed.
        #  Perhaps crude, this is done by using the name of the reference input FASTA file
        #  and looking for a '.1.ebwt' suffix
        bowtie_build_flow_bp = FlowBlueprint( type='s', \
                                              description='Runs bowtie-build if an index file isn\'t detected.', \
                                              conditional_code='' )
    def handle(self, *args, **options):
        tool_name = 'Trinity in silico read normalization'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('Trinity', tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Large RNA-Seq data sets, such as those exceeding 300M pairs, are best suited for in silico normalization prior to running Trinity, in order to reduce memory requirements and greatly improve upon runtimes. Before running the normalization, be sure that in the case of paired reads, the left read names end with suffix /1 and the right read names end with /2')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/trinity_insilico_normalization.html', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity read normalization', \
                                       exec_path = tool_settings['normalization_script'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--left_list', prefix='--left_list ', position=3, \
            short_desc='Left reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--right_list', prefix='--right_list ', position=4, \
            short_desc='Right reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--pairs_together', prefix='--pairs_together ', position=6, \
            has_no_value=True, short_desc='Process paired reads by averaging stats between pairs and retaining linking info' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=7, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=8, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='normalized_reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--JELLY_CPU', prefix='--JELLY_CPU ', position=9, \
            short_desc='Number of threads for Jellyfish to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--PARALLEL_STATS', prefix='--PARALLEL_STATS ', position=10, \
            has_no_value=True, short_desc='Generate read stats in parallel for paired reads (Figure 2X Inchworm memory requirement)' ).save()

        CommandBlueprintParam( command=command_bp, name='--KMER_SIZE', prefix='--KMER_SIZE ', position=11, \
            short_desc='K-mer size for de Bruijn graph construction', default_value='25' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', \
            position=12, short_desc='Minimum kmer coverage for catalog construction', default_value='1' ).save()

        CommandBlueprintParam( command=command_bp, name='--max_pct_stdev', prefix='--max_pct_stdev ', position=13, \
            short_desc='Maximum pct of mean for stdev of kmer coverage across read', default_value='100' ).save()

        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--single' )

        # TODO: parameter grouping needs to be applied here.
        # TODO: needs improving.  Unfortunately, Trinity currently only supports output definition
        #  at the directory level, and the file names under that are created by convention.
        #  I've written Brian to see if I can add this
        tool.can_create( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--output' )
        tool.can_create( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--output' )
        tool.can_create( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--output' )
Пример #4
0
    def handle(self, *args, **options):
        tool_name = 'show-coords'
        tool_version = '3.23'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://mummer.sourceforge.net/manual/#coords', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run show-coords', \
                                       exec_path = tool_settings['show_coords_bin'] )
        command_bp.save()

        # USAGE: show-coords  [options]  <deltafile>

        CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', has_no_value=True, position=1, \
            short_desc='Merges overlapping alignments', \
            long_desc='Merges overlapping alignments regardless of match dir or frame and does not display any idenitity information.' ).save()

        CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=2, \
            short_desc='Switch output to btab format' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', has_no_value=True, position=3, \
            short_desc='Include percent coverage information in the output' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', has_no_value=True, position=4, \
            short_desc='Display the alignment direction in the additional FRM columns (default for promer)' ).save()

        CommandBlueprintParam( command=command_bp, name='-H', prefix='-H ', has_no_value=True, position=5, \
            short_desc='Do not print the output header' ).save()

        CommandBlueprintParam( command=command_bp, name='-I', prefix='-I ', position=6, \
            short_desc='Set minimum percent identity to display' ).save()

        CommandBlueprintParam( command=command_bp, name='-k', prefix='-k ', has_no_value=True, position=7, \
            short_desc='Knockout 50/75 alignments', \
            long_desc='Knockout (do not display) alignments that overlap another alignment in a different frame by more than 50% of their length, AND have a smaller percent similarity or are less than 75% of the size of the other alignment (promer only)' ).save()

        CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', has_no_value=True, position=8, \
            short_desc='Include the sequence length information in the output' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-L', prefix='-L ', position=9, \
            short_desc='Set minimum alignment length to display' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', has_no_value=True, position=10, \
            short_desc='Annotate maximal alignments between two sequences', \
            long_desc='Annotate maximal alignments between two sequences, i.e. overlaps between reference and query sequences').save()

        CommandBlueprintParam( command=command_bp, name='-q', prefix='-q ', has_no_value=True, position=11, \
            short_desc='Sort output lines by query IDs and coordinates' ).save()

        CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=12, \
            short_desc='Sort output lines by reference IDs and coordinates' ).save()

        CommandBlueprintParam( command=command_bp, name='-T', prefix='-T ', has_no_value=True, position=13, \
            short_desc='Switch output to tab-delimited format' ).save()

        CommandBlueprintParam( command=command_bp, name='<deltafile>', prefix=None, position=14, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        
        tool.needs( filetype_name='MUMmer delta file', via_command=command_bp, via_param='<deltafile>' )
Пример #5
0
    def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()


        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='-i' )
        tool.can_create( filetype_name='GenBank Flat File Format', via_command=command_bp, via_params=['-o', '-f=gbk'] )
        tool.can_create( filetype_name='GFF3', via_command=command_bp, via_params=['-o', '-f=gff'] )
Пример #6
0
    def handle(self, *args, **options):
        tool_name = 'Trinity in silico read normalization'
        tool_version = 'r2013-02-25'

        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(
                tool_name, tool_version))
            return True

        settings = configparser.ConfigParser()
        settings.read(
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         '../../settings.ini'))

        tool_settings = settings["{0} {1}".format('Trinity', tool_version)]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Large RNA-Seq data sets, such as those exceeding 300M pairs, are best suited for in silico normalization prior to running Trinity, in order to reduce memory requirements and greatly improve upon runtimes. Before running the normalization, be sure that in the case of paired reads, the left read names end with suffix /1 and the right read names end with /2')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/trinity_insilico_normalization.html', \
                               flow_bp=flow_bp )
        tool.save()

        self.add_toolfiletype(tool, 'i', 'FASTQ (Sanger, paired reads, left)',
                              False)
        self.add_toolfiletype(tool, 'i', 'FASTQ (Sanger, paired reads, right)',
                              False)
        self.add_toolfiletype(tool, 'i', 'FASTQ (Sanger, unpaired reads)',
                              False)

        self.add_toolfiletype(tool, 'o', 'FASTQ (Sanger, paired reads, left)',
                              False)
        self.add_toolfiletype(tool, 'o', 'FASTQ (Sanger, paired reads, right)',
                              False)
        self.add_toolfiletype(tool, 'o', 'FASTQ (Sanger, unpaired reads)',
                              False)


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity read normalization', \
                                       exec_path = tool_settings['normalization_script'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--left_list', prefix='--left_list ', position=3, \
            short_desc='Left reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--right_list', prefix='--right_list ', position=4, \
            short_desc='Right reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--pairs_together', prefix='--pairs_together ', position=6, \
            has_no_value=True, short_desc='Process paired reads by averaging stats between pairs and retaining linking info' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=7, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=8, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='normalized_reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--JELLY_CPU', prefix='--JELLY_CPU ', position=9, \
            short_desc='Number of threads for Jellyfish to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--PARALLEL_STATS', prefix='--PARALLEL_STATS ', position=10, \
            has_no_value=True, short_desc='Generate read stats in parallel for paired reads (Figure 2X Inchworm memory requirement)' ).save()

        CommandBlueprintParam( command=command_bp, name='--KMER_SIZE', prefix='--KMER_SIZE ', position=11, \
            short_desc='K-mer size for de Bruijn graph construction', default_value='25' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', \
            position=12, short_desc='Minimum kmer coverage for catalog construction', default_value='1' ).save()

        CommandBlueprintParam( command=command_bp, name='--max_pct_stdev', prefix='--max_pct_stdev ', position=13, \
            short_desc='Maximum pct of mean for stdev of kmer coverage across read', default_value='100' ).save()
Пример #7
0
    def handle(self, *args, **options):
        tool_name = 'Trinity'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (cfa, cfq, fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=6, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=7, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='trinity_out_dir' ).save()

        CommandBlueprintParam( command=command_bp, name='--CPU', prefix='--CPU ', position=8, \
            short_desc='Number of CPUs to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_contig_length', prefix='--min_contig_length ', \
            position=9, short_desc='Minimum assembled contig length to report', default_value='200' ).save()

        CommandBlueprintParam( command=command_bp, name='--jaccard_clip', prefix='--jaccard_clip ', position=10, \
            has_no_value=True, short_desc='Set if you have paired reads and expect high gene density with UTR overlap.  This is an expensive operation.' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_cleanup', prefix='--no_cleanup ', position=11, \
            has_no_value=True, short_desc='Retain all intermediate input files' ).save()


        ####################################################
        # Inchworm and K-mer counting-related options: #####

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', position=12, \
            short_desc='Min count for K-mers to be assembled by Inchworm', default_value='1' ).save()

        ## Should later add the --no_run_quantifygraph option and process the rest via an iterator

        #####################################
        ###  Butterfly-related options:  ####
        
        CommandBlueprintParam( command=command_bp, name='--max_number_of_paths_per_node', prefix='--max_number_of_paths_per_node ', \
            position=13, short_desc='Only most supported (N) paths are extended from node A->B, mitigating combinatoric path explorations', \
            default_value='10' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--group_pairs_distance', prefix='--group_pairs_distance ', \
            position=14, short_desc='Maximum length expected between fragment pairs.  Reads outside this will be treated as single-end', \
            default_value='500' ).save()

        CommandBlueprintParam( command=command_bp, name='--path_reinforcement_distance', prefix='--path_reinforcement_distance ', \
            position=15, short_desc='Minimum overlap of reads with growing transcript path (default: PE: 75, SE: 25)' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_triplet_lock', prefix='--no_triplet_lock ', position=16, \
            has_no_value=True, short_desc='Do not lock triplet-supported nodes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceMax', prefix='--bflyHeapSpaceMax ', position=17, \
            default_value='20G', short_desc='Java max heap space setting for butterfly' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceInit', prefix='--bflyHeapSpaceInit ', position=18, \
            default_value='1G', short_desc='Java initial heap space settings for butterfly' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyGCThreads', prefix='--bflyGCThreads ', position=19, \
            short_desc='Threads for garbage collection' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCPU', prefix='--bflyCPU ', position=20, \
            short_desc='CPUs to use.  Default will match --CPU value' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCalculateCPU', prefix='--bflyCalculateCPU ', position=21, \
            short_desc='Calculate CPUs based on 805 of max_memory divided by bflyHeapSpaceMax' ).save()

        # TODO: needs improving.  Unfortunately, Trinity currently only supports output definition
        #  at the directory level, and the file names under that are created by convention.
        #  I've written Brian to see if I can add this
        tool.creates( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='--output' )
    
        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--single' )

        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTA (paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTA (paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTA (unpaired reads)', via_command=command_bp, via_param='--single' )
Пример #8
0
    def handle(self, *args, **options):
        tool_name = 'Bowtie-build'
        tool_version = '1.0.0'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('Bowtie', tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Bowtie is an ultrafast, memory-efficient short read aligner. It aligns short DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to keep its memory footprint small: typically about 2.2 GB for the human genome (2.9 GB for paired-end).')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://bowtie-bio.sourceforge.net/index.shtml', \
                               flow_bp=flow_bp )
        tool.save()

        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Build an index for bowtie', \
                                       exec_path = tool_settings['bowtie_build_bin'] )
        command_bp.save()


        # bowtie-build [options]* <reference_in> <ebwt_outfile_base>

        CommandBlueprintParam( command=command_bp, name='-C', prefix='-C ', has_no_value=True, position=1, \
            short_desc='Build a colorspace index' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', has_no_value=True, position=2, \
            short_desc='Disable automatic -p/--bmax/--dcv memory-fitting' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', has_no_value=True, position=3, \
            short_desc='Use packed strings internally; slower, uses less mem' ).save()

        CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=4, \
            short_desc='Build both letter- and colorspace indexes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bmax', prefix='--bmax ', position=5, \
            short_desc='Max bucket sz for blockwise suffix-array builder' ).save()

        CommandBlueprintParam( command=command_bp, name='--bmaxdivn', prefix='--bmaxdivn ', position=6, default_value='4', \
            short_desc='Max bucket sz as divisor of ref len' ).save()

        CommandBlueprintParam( command=command_bp, name='--dcv', prefix='--dcv ', position=7, default_value='1024', \
            short_desc='Diff-cover period for blockwise' ).save()

        CommandBlueprintParam( command=command_bp, name='--nodc', prefix='--nodc ', has_no_value=True, position=8, \
            short_desc='Disable diff-cover (algorithm becomes quadratic)' ).save()

        CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=9, \
            short_desc='Do not build .3/.4.ebwt (packed reference) portion' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-3', prefix='-3 ', has_no_value=True, position=10, \
            short_desc='Just build .3/.4.ebwt (packed reference) portion' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=11, default_value='5', \
            short_desc='SA is sampled every 2^offRate BWT chars' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, default_value='10', \
            short_desc='# of chars consumed in initial lookup' ).save()

        CommandBlueprintParam( command=command_bp, name='--ntoa', prefix='--ntoa ', has_no_value=True, position=13, \
            short_desc='Convert Ns in reference to As' ).save()

        CommandBlueprintParam( command=command_bp, name='--seed', prefix='--seed ', position=14, \
            short_desc='Seed for random number generator' ).save()

        CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        CommandBlueprintParam( command=command_bp, name='<ebwt_outfile_base>', prefix=None, position=16, is_optional=False, \
            short_desc='Path to the basename of the ebwt files to be created' ).save()

        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' )
        tool.creates( filetype_name='Bowtie 1.0 index', via_command=command_bp, via_param='<ebwt_outfile_base>' )
Пример #9
0
    def handle(self, *args, **options):
        tool_name = 'NUCmer'
        tool_version = '3.23'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://mummer.sourceforge.net/manual/#nucmer', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run NUCmer', \
                                       exec_path = tool_settings['nucmer_bin'] )
        command_bp.save()

        # USAGE: nucmer  [options]  <Reference>  <Query>

        CommandBlueprintParam( command=command_bp, name='--mum', prefix='--mum ', has_no_value=True, position=1, \
            short_desc='Use anchor matches that are unique in both the reference and query' ).save()

        CommandBlueprintParam( command=command_bp, name='--mumreference', prefix='--mumreference ', has_no_value=True, position=2, \
            short_desc='Use anchor matches that are unique in the reference but not necessarily unique in the query' ).save()

        CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', position=3, default_value='200', \
            short_desc='Alignment extension distance', \
            long_desc='Distance an alignment extension will attempt to extend poor scoring regions before giving up').save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', default_value='65', position=4, \
            short_desc='Minimum length of a cluster of matches' ).save()

        CommandBlueprintParam( command=command_bp, name='--nodelta', prefix='--nodelta ', has_no_value=True, position=5, \
            short_desc='Toggles off creation of delta file' ).save()

        CommandBlueprintParam( command=command_bp, name='-D', prefix='-D ', default_value='5', position=6, \
            short_desc='Maximum diagonal difference between two adjacent anchors in a cluster' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', default_value='0.12', position=7, \
            short_desc='Maximum diagonal difference ratio', \
            long_desc='Maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length ' ).save()

        CommandBlueprintParam( command=command_bp, name='--noextend', prefix='--noextend ', has_no_value=True, position=8, \
            short_desc='Toggles off the cluster extension step' ).save()

        CommandBlueprintParam( command=command_bp, name='--forward', prefix='--forward ', has_no_value=True, position=9, \
            short_desc='Use only the forward strand of the Query sequences' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', default_value='90', position=10, \
            short_desc='Maximum gap between two adjacent matches in a cluster' ).save()

        CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', default_value='20', position=11, \
            short_desc='Minimum length of a single match' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--nooptimize', prefix='--nooptimize ', has_no_value=True, position=12, \
            short_desc='Toggle off alignment score optimization', \
            long_desc='Toggles off alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will backtrack to optimize the alignment score instead of terminating the alignment at the end of the sequence').save()
        
        CommandBlueprintParam( command=command_bp, name='--reverse', prefix='--reverse ', has_no_value=True, position=13, \
            short_desc='Use only the reverse complement of the Query sequences' ).save()

        CommandBlueprintParam( command=command_bp, name='--nosimplify', prefix='--nosimplify ', has_no_value=True, position=14, \
            short_desc='Removes shadowed clusters', \
            long_desc='Simplify alignments by removing shadowed clusters. Turn this option off if aligning a sequence to itself to look for repeats' ).save()

        CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        CommandBlueprintParam( command=command_bp, name='<query_in>', prefix=None, position=16, is_optional=False, \
            short_desc='Input query FASTA file' ).save()
        
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' )
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<query_in>' )
    def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'

        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(
                tool_name, tool_version))
            return True

        settings = configparser.ConfigParser()
        settings.read(
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         '../../settings.ini'))

        tool_settings = settings["{0} {1}".format(tool_name, tool_version)]

        flow_bp = FlowBlueprint(type='s')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()

        tool.needs(filetype_name='FASTA (nucleotide)',
                   via_command=command_bp,
                   via_param='-i')
        tool.can_create(filetype_name='GenBank Flat File Format',
                        via_command=command_bp,
                        via_params=['-o', '-f=gbk'])
        tool.can_create(filetype_name='GFF3',
                        via_command=command_bp,
                        via_params=['-o', '-f=gff'])
    def handle(self, *args, **options):
        tool_name = 'Trinity'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/', \
                               flow_bp=flow_bp )
        tool.save()

        self.add_toolfiletype( tool, 'i', 'FASTQ (Sanger, paired reads, left)', False )
        self.add_toolfiletype( tool, 'i', 'FASTQ (Sanger, paired reads, right)', False )
        self.add_toolfiletype( tool, 'i', 'FASTQ (Sanger, unpaired reads)', False )
        
        self.add_toolfiletype( tool, 'o', 'FASTA (nucleotide)', True )


        command_bp = CommandBlueprint( parent = flow_bp, \
                                       name = 'Run Trinity', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (cfa, cfq, fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=6, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=7, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='trinity_out_dir' ).save()

        CommandBlueprintParam( command=command_bp, name='--CPU', prefix='--CPU ', position=8, \
            short_desc='Number of CPUs to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_contig_length', prefix='--min_contig_length ', \
            position=9, short_desc='Minimum assembled contig length to report', default_value='200' ).save()

        CommandBlueprintParam( command=command_bp, name='--jaccard_clip', prefix='--jaccard_clip ', position=10, \
            has_no_value=True, short_desc='Set if you have paired reads and expect high gene density with UTR overlap.  This is an expensive operation.' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_cleanup', prefix='--no_cleanup ', position=11, \
            has_no_value=True, short_desc='Retain all intermediate input files' ).save()


        ####################################################
        # Inchworm and K-mer counting-related options: #####

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', position=12, \
            short_desc='Min count for K-mers to be assembled by Inchworm', default_value='1' ).save()

        ## Should later add the --no_run_quantifygraph option and process the rest via an iterator

        #####################################
        ###  Butterfly-related options:  ####
        
        CommandBlueprintParam( command=command_bp, name='--max_number_of_paths_per_node', prefix='--max_number_of_paths_per_node ', \
            position=13, short_desc='Only most supported (N) paths are extended from node A->B, mitigating combinatoric path explorations', \
            default_value='10' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--group_pairs_distance', prefix='--group_pairs_distance ', \
            position=14, short_desc='Maximum length expected between fragment pairs.  Reads outside this will be treated as single-end', \
            default_value='500' ).save()

        CommandBlueprintParam( command=command_bp, name='--path_reinforcement_distance', prefix='--path_reinforcement_distance ', \
            position=15, short_desc='Minimum overlap of reads with growing transcript path (default: PE: 75, SE: 25)' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_triplet_lock', prefix='--no_triplet_lock ', position=16, \
            has_no_value=True, short_desc='Do not lock triplet-supported nodes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceMax', prefix='--bflyHeapSpaceMax ', position=17, \
            default_value='20G', short_desc='Java max heap space setting for butterfly' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceInit', prefix='--bflyHeapSpaceInit ', position=18, \
            default_value='1G', short_desc='Java initial heap space settings for butterfly' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyGCThreads', prefix='--bflyGCThreads ', position=19, \
            short_desc='Threads for garbage collection' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCPU', prefix='--bflyCPU ', position=20, \
            short_desc='CPUs to use.  Default will match --CPU value' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCalculateCPU', prefix='--bflyCalculateCPU ', position=21, \
            short_desc='Calculate CPUs based on 805 of max_memory divided by bflyHeapSpaceMax' ).save()