Exemplo n.º 1
0
    def handle(self, *args, **options):
        tool_name = 'GeneMark-ES'
        tool_version = '2.3e'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://topaz.gatech.edu', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run GeneMark-ES', \
                                       exec_path = tool_settings['gm_es_bin'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )
        
  
        CommandBlueprintParam( command=command_bp, name='--max_nnn', prefix='--max_nnn ', position=1, default_value='49',  \
            short_desc='[number] number of unknown letters used to fill gaps').save()

        CommandBlueprintParam( command=command_bp, name='--min_contig', prefix='--min_contig ', position=2, default_value='20000',  \
            short_desc='[number] minimum length of the contig', \
            long_desc='All contigs shorter then "min_contig" are excluded from training procedure.').save()

        CommandBlueprintParam( command=command_bp, name='--max_contig', prefix='--max_contig ', position=3, default_value='10000000', \
        	short_desc='[number] maximum length of contig', \
            long_desc='Sequences longer then "max_contig" are split into shorter ones to avoid out of memory condition.').save()

        CommandBlueprintParam( command=command_bp, name='--BP', prefix='--BP ', position=4, \
            short_desc='switches off the branch point submodel and runs original ES algorithm (ON/OFF)').save()

        CommandBlueprintParam( command=command_bp, name='--ini_mod', prefix='--ini_mod ', position=5,  \
            short_desc='').save() 
            
        CommandBlueprintParam( command=command_bp, name='<sequence_file_name>', prefix=None, position=6, is_optional=False, \
            short_desc='input sequence in FASTA format').save() 
        

		
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_file_name>' )
        tool.can_create( filetype_name='GFF3', via_command=command_bp )
Exemplo n.º 2
0
    def handle(self, *args, **options):
        #  remember that these need to match the [geneid 1.4] in settings.ini
        tool_name = 'geneid'
        tool_version = '1.4'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s', name=tool_name )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://www1.imim.es/software/geneid/index.html', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run geneid', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )

        ###########################################################
        ## Now you define the parameters for the tool

        # There are an enormous number of options, and not all are implemented here.  Even
        #  so, it should work for most applications.

        # geneid -3 -P <parameter_filename> <Sequence_filename> > some.out.gff3

        CommandBlueprintParam( command=command_bp, name='-3', prefix='-3 ', position=1, has_no_value=True, \
            is_optional=False, short_desc='Use GFF3 format to print predictions' ).save()

        CommandBlueprintParam( command=command_bp, name='-P', prefix='-P ', position=2, \
            is_optional=False, short_desc='Parameter file to use' ).save()

        ## this has to be the last argument positionally
        CommandBlueprintParam( command=command_bp, name='<sequence_filename>', prefix=None, position=3, \
            is_optional=False, short_desc='Input query FASTA file' ).save()


        ###########################################################
        ## Now define the input/ouput of the tool
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_filename>' )
Exemplo n.º 3
0
    def handle(self, *args, **options):
        tool_name = 'EVM'
        tool_version = 'r20120625'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://sourceforge.net/projects/evidencemodeler', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run EVM', \
                                       exec_path = tool_settings['evm_bin'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )

        CommandBlueprintParam( command=command_bp, name='--genome', prefix='--genome ', position=1, is_optional=False,  \
            short_desc='Genome sequence in FASTA format').save()

        CommandBlueprintParam( command=command_bp, name='--weights', prefix='--weights ', position=2, is_optional=False,  \
            short_desc='Weights for evidence types file', \
            long_desc='The weights file is used to describe and score the types of each input.  More info here: http://evidencemodeler.sourceforge.net/#Preparing_inputs').save()

        CommandBlueprintParam( command=command_bp, name='--gene_predictions', prefix='--gene_predictions ', position=3, is_optional=False, \
            short_desc='Gene predictions GFF3 file', \
            long_desc='All input files which correspond to gene predictions (and not protein or transcript alignments) should be concatenated into this file.').save()

        CommandBlueprintParam( command=command_bp, name='--protein_alignments', prefix='--protein_alignments ', position=4, is_optional=True, \
            short_desc='Protein alignments in GFF3 format').save()

        CommandBlueprintParam( command=command_bp, name='--transcript_alignments', prefix='--transcript_alignments ', position=5, is_optional=True, \
            short_desc='Transcript alignments in GFF3 format').save()
        
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='--genome' )
        tool.needs( filetype_name='GFF3', via_command=command_bp, via_param='--gene_predictions' )
Exemplo n.º 4
0
    def handle(self, *args, **options):
        tool_name = 'NUCmer'
        tool_version = '3.23'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://mummer.sourceforge.net/manual/#nucmer', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run NUCmer', \
                                       exec_path = tool_settings['nucmer_bin'] )
        command_bp.save()
        command_bp.parents.add(flow_bp)

        # USAGE: nucmer  [options]  <Reference>  <Query>

        CommandBlueprintParam( command=command_bp, name='STDOUT', position=0 ).save();
        CommandBlueprintParam( command=command_bp, name='STDERR', position=0 ).save();
        
        CommandBlueprintParam( command=command_bp, name='--mum', prefix='--mum ', has_no_value=True, position=1, \
            short_desc='Use anchor matches that are unique in both the reference and query' ).save()

        CommandBlueprintParam( command=command_bp, name='--mumreference', prefix='--mumreference ', has_no_value=True, position=2, \
            short_desc='Use anchor matches that are unique in the reference but not necessarily unique in the query' ).save()

        CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', position=3, default_value='200', \
            short_desc='Alignment extension distance', \
            long_desc='Distance an alignment extension will attempt to extend poor scoring regions before giving up').save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', default_value='65', position=4, \
            short_desc='Minimum length of a cluster of matches' ).save()

        CommandBlueprintParam( command=command_bp, name='--nodelta', prefix='--nodelta ', has_no_value=True, position=5, \
            short_desc='Toggles off creation of delta file' ).save()

        CommandBlueprintParam( command=command_bp, name='-D', prefix='-D ', default_value='5', position=6, \
            short_desc='Maximum diagonal difference between two adjacent anchors in a cluster' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', default_value='0.12', position=7, \
            short_desc='Maximum diagonal difference ratio', \
            long_desc='Maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length ' ).save()

        CommandBlueprintParam( command=command_bp, name='--noextend', prefix='--noextend ', has_no_value=True, position=8, \
            short_desc='Toggles off the cluster extension step' ).save()

        CommandBlueprintParam( command=command_bp, name='--forward', prefix='--forward ', has_no_value=True, position=9, \
            short_desc='Use only the forward strand of the Query sequences' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', default_value='90', position=10, \
            short_desc='Maximum gap between two adjacent matches in a cluster' ).save()

        CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', default_value='20', position=11, \
            short_desc='Minimum length of a single match' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--nooptimize', prefix='--nooptimize ', has_no_value=True, position=12, \
            short_desc='Toggle off alignment score optimization', \
            long_desc='Toggles off alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will backtrack to optimize the alignment score instead of terminating the alignment at the end of the sequence').save()

        # we make this one required just so that a tool can always look up the output file more easily
        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', default_value='out', position=17, is_optional=False, \
            short_desc='Sets the output file prefix, which can include the directory path' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--reverse', prefix='--reverse ', has_no_value=True, position=13, \
            short_desc='Use only the reverse complement of the Query sequences' ).save()

        CommandBlueprintParam( command=command_bp, name='--nosimplify', prefix='--nosimplify ', has_no_value=True, position=14, \
            short_desc='Removes shadowed clusters', \
            long_desc='Simplify alignments by removing shadowed clusters. Turn this option off if aligning a sequence to itself to look for repeats' ).save()

        CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        CommandBlueprintParam( command=command_bp, name='<query_in>', prefix=None, position=16, is_optional=False, \
            short_desc='Input query FASTA file' ).save()
        
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' )
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<query_in>' )
        tool.creates( filetype_name='MUMmer delta file', via_command=command_bp, via_param='STDOUT' )
Exemplo n.º 5
0
    def handle(self, *args, **options):
        tool_name = 'show-coords'
        tool_version = '3.23'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://mummer.sourceforge.net/manual/#coords', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run show-coords', \
                                       exec_path = tool_settings['show_coords_bin'] )
        command_bp.save()
        command_bp.parents.add(flow_bp)

        # USAGE: show-coords  [options]  <deltafile>

        CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', has_no_value=True, position=1, \
            short_desc='Merges overlapping alignments', \
            long_desc='Merges overlapping alignments regardless of match dir or frame and does not display any idenitity information.' ).save()

        CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=2, \
            short_desc='Switch output to btab format' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', has_no_value=True, position=3, \
            short_desc='Include percent coverage information in the output' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', has_no_value=True, position=4, \
            short_desc='Display the alignment direction in the additional FRM columns (default for promer)' ).save()

        CommandBlueprintParam( command=command_bp, name='-H', prefix='-H ', has_no_value=True, position=5, \
            short_desc='Do not print the output header' ).save()

        CommandBlueprintParam( command=command_bp, name='-I', prefix='-I ', position=6, \
            short_desc='Set minimum percent identity to display' ).save()

        CommandBlueprintParam( command=command_bp, name='-k', prefix='-k ', has_no_value=True, position=7, \
            short_desc='Knockout 50/75 alignments', \
            long_desc='Knockout (do not display) alignments that overlap another alignment in a different frame by more than 50% of their length, AND have a smaller percent similarity or are less than 75% of the size of the other alignment (promer only)' ).save()

        CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', has_no_value=True, position=8, \
            short_desc='Include the sequence length information in the output' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-L', prefix='-L ', position=9, \
            short_desc='Set minimum alignment length to display' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', has_no_value=True, position=10, \
            short_desc='Annotate maximal alignments between two sequences', \
            long_desc='Annotate maximal alignments between two sequences, i.e. overlaps between reference and query sequences').save()

        CommandBlueprintParam( command=command_bp, name='-q', prefix='-q ', has_no_value=True, position=11, \
            short_desc='Sort output lines by query IDs and coordinates' ).save()

        CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=12, \
            short_desc='Sort output lines by reference IDs and coordinates' ).save()

        CommandBlueprintParam( command=command_bp, name='-T', prefix='-T ', has_no_value=True, position=13, \
            short_desc='Switch output to tab-delimited format' ).save()

        CommandBlueprintParam( command=command_bp, name='<deltafile>', prefix=None, position=14, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        tool.needs( filetype_name='MUMmer delta file', via_command=command_bp, via_param='<deltafile>' )
Exemplo n.º 6
0
    def handle(self, *args, **options):
        tool_name = 'GeneMarkS'
        tool_version = '4.6b'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://topaz.gatech.edu', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run GeneMarkS', \
                                       exec_path = tool_settings['gm_s_bin'] )
        command_bp.save()
        command_bp.parents.add(flow_bp)
        
        
        CommandBlueprintParam( command=command_bp, name='--name', prefix='--name ', position=1,default_value='GeneMark_hmm.mod', \
            short_desc='<string> name of output model file generated for GeneMark.hmm').save()
            
        CommandBlueprintParam( command=command_bp, name='--combine', prefix='--combine ', position=2, default_value='GeneMark_hmm_combined.mod' ,\
            short_desc='combine GeneMarkS generated and Heuristic model parameters into one integrated model').save()
   

        CommandBlueprintParam( command=command_bp, name='--gm', prefix='--gm ', position=3, default_value='GeneMark.mat',  \
            short_desc='generate model file for GeneMark').save()

        CommandBlueprintParam( command=command_bp, name='--species', prefix='--species ', position=4, \
            short_desc='<string> name of a species in a model file' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--clean', prefix='--clean ', position=5, has_no_value=True, \
            short_desc='delete all temporary files').save()

        CommandBlueprintParam( command=command_bp, name='--order', prefix='--order ', position=6, default_value='2', \
            short_desc='<number> markov chain order. (default: 2; supported in range: >= 0)' ).save()

        CommandBlueprintParam( command=command_bp, name='--gcode', prefix='--gcode ', position=7, default_value='11', \
            short_desc='<number> genetic code. default: 11; supported: 11, 4 and 1)' ).save()

        CommandBlueprintParam( command=command_bp, name='--shape', prefix='--shape ', position=8, default_value='partial', \
            short_desc='<string> sequence organization (default: partial; supported: linear, circular and partial)' ).save()

        CommandBlueprintParam( command=command_bp, name='--motif', prefix='--motif ', position=9, default_value='1', \
            long_desc='<number> iterative search for a sequence motif associated with CDS start. (default: 1; supported: 1 <true> and 0 <false>)' ).save()

        CommandBlueprintParam( command=command_bp, name='--width', prefix='--width ', position=10, default_value='6' , \
            short_desc='<number> motif width (default: 6; supported in range: >= 3)' ).save()

        CommandBlueprintParam( command=command_bp, name='--prestart', prefix='--prestart ', position=11, default_value='26', \
            long_desc='<number> length of sequence upstream of translation initiation site that presumably includes the motif (default: 26; supported in range: >= 0)' ).save()

        CommandBlueprintParam( command=command_bp, name='--identity', prefix='--identity ', position=12, default_value='0.99', \
            long_desc='<number> identity level assigned for termination of iterations (default: 0.99; supported in range: >=0 and <= 1)' ).save()

        CommandBlueprintParam( command=command_bp, name='--matrix', prefix='--matrix ', position=13, default_value='10', \
            short_desc='<number> maximum number of iterations (default: 10; supported in range: >= 1)').save()

        CommandBlueprintParam( command=command_bp, name='--fixmotif', prefix='--fixmotif ', position=14, has_no_value = True, \
            long_desc='prohibits gene overlap (if not specified: overlaps are allowed)').save()

        CommandBlueprintParam( command=command_bp, name='--offover', prefix='--offover ', position=15, has_no_value = True, \
            short_desc='prohibits gene overlap (if not specified: overlaps are allowed)').save()

        CommandBlueprintParam( command=command_bp, name='--strand', prefix='--strand ', position=16, default_value='both', \
            short_desc='<string> sequence strand to predict genes in (default: both; supported: direct, reverse and both )').save()


        CommandBlueprintParam( command=command_bp, name='--prok', prefix='--prok ', position=16, has_no_value = True, \
			short_desc='same as:  --combine --clean --gm', \
            long_desc='to run program on prokaryotic sequence or phage with building models for both GeneMark and GeneMark.hmm').save()

        CommandBlueprintParam( command=command_bp, name='--euk', prefix='--euk ', position=17, has_no_value = True, \
            short_desc='same as:  --offover --gcode 1 --clean --fixmotif --prestart 6 --width 12 --order 4 --gm', \
            long_desc='to run program on eukaryotic intron-less sequence (i.e. low eukaryote)').save()

        CommandBlueprintParam( command=command_bp, name='--viral', prefix='--viral ', position=18, has_no_value = True, \
	    short_desc='same as:  --combine --gcode 1 --clean --fixmotif --prestart 6 --width 12 --gm', \
            long_desc='to run program on a eukaryotic viral genome').save()


        CommandBlueprintParam( command=command_bp, name='--par', prefix='--par ', position=19,  \
            short_desc='<file name> custom parameters for GeneMarkS',  \
            long_desc='default is selected based on gcode value: par_<gcode>.default').save()
            
        CommandBlueprintParam( command=command_bp, name='--imod', prefix='--imod ', position=20,  \
            short_desc='<file name> custom initiation model for GeneMarkS',  \
            long_desc='default: heuristic model derived from GC composition of input sequence').save()
  
        CommandBlueprintParam( command=command_bp, name='--test', prefix='--test ', position=21, has_no_value=True,  \
            short_desc='installation test').save()
  
        CommandBlueprintParam( command=command_bp, name='--verbose', prefix='--verbose ', position=22, has_no_value=True,  \
            short_desc='prints stderr').save()
 
        CommandBlueprintParam( command=command_bp, name='<sequence_file_name>', prefix=None, position=23, is_optional=False, \
            short_desc='input sequence in FASTA format' ).save()

        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_file_name>' )
        tool.can_create( filetype_name='GFF3', via_command=command_bp )
Exemplo n.º 7
0
    def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s', name=tool_name )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()


        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='-i' )
        tool.can_create( filetype_name='GenBank Flat File Format', via_command=command_bp, via_params=['-o', '-f=gbk'] )
        tool.can_create( filetype_name='GFF3', via_command=command_bp, via_params=['-o', '-f=gff'] )
Exemplo n.º 8
0
    def handle(self, *args, **options):
        tool_name = 'Bowtie-build'
        tool_version = '1.0.0'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('Bowtie', tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Bowtie is an ultrafast, memory-efficient short read aligner. It aligns short DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to keep its memory footprint small: typically about 2.2 GB for the human genome (2.9 GB for paired-end).')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://bowtie-bio.sourceforge.net/index.shtml', \
                               flow_bp=flow_bp )
        tool.save()

        command_bp = CommandBlueprint( name = 'Build an index for bowtie', \
                                       exec_path = tool_settings['bowtie_build_bin'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )


        # bowtie-build [options]* <reference_in> <ebwt_outfile_base>

        CommandBlueprintParam( command=command_bp, name='-C', prefix='-C ', has_no_value=True, position=1, \
            short_desc='Build a colorspace index' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', has_no_value=True, position=2, \
            short_desc='Disable automatic -p/--bmax/--dcv memory-fitting' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', has_no_value=True, position=3, \
            short_desc='Use packed strings internally; slower, uses less mem' ).save()

        CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=4, \
            short_desc='Build both letter- and colorspace indexes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bmax', prefix='--bmax ', position=5, \
            short_desc='Max bucket sz for blockwise suffix-array builder' ).save()

        CommandBlueprintParam( command=command_bp, name='--bmaxdivn', prefix='--bmaxdivn ', position=6, default_value='4', \
            short_desc='Max bucket sz as divisor of ref len' ).save()

        CommandBlueprintParam( command=command_bp, name='--dcv', prefix='--dcv ', position=7, default_value='1024', \
            short_desc='Diff-cover period for blockwise' ).save()

        CommandBlueprintParam( command=command_bp, name='--nodc', prefix='--nodc ', has_no_value=True, position=8, \
            short_desc='Disable diff-cover (algorithm becomes quadratic)' ).save()

        CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=9, \
            short_desc='Do not build .3/.4.ebwt (packed reference) portion' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-3', prefix='-3 ', has_no_value=True, position=10, \
            short_desc='Just build .3/.4.ebwt (packed reference) portion' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=11, default_value='5', \
            short_desc='SA is sampled every 2^offRate BWT chars' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, default_value='10', \
            short_desc='# of chars consumed in initial lookup' ).save()

        CommandBlueprintParam( command=command_bp, name='--ntoa', prefix='--ntoa ', has_no_value=True, position=13, \
            short_desc='Convert Ns in reference to As' ).save()

        CommandBlueprintParam( command=command_bp, name='--seed', prefix='--seed ', position=14, \
            short_desc='Seed for random number generator' ).save()

        CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        CommandBlueprintParam( command=command_bp, name='<ebwt_outfile_base>', prefix=None, position=16, is_optional=False, \
            short_desc='Path to the basename of the ebwt files to be created' ).save()

        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' )
        tool.creates( filetype_name='Bowtie 1.0 index', via_command=command_bp, via_param='<ebwt_outfile_base>' )