Exemplo n.º 1
0
    def handle(self, *args, **options):
        tool_name = 'GeneMark-ES'
        tool_version = '2.3e'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://topaz.gatech.edu', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run GeneMark-ES', \
                                       exec_path = tool_settings['gm_es_bin'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )
        
  
        CommandBlueprintParam( command=command_bp, name='--max_nnn', prefix='--max_nnn ', position=1, default_value='49',  \
            short_desc='[number] number of unknown letters used to fill gaps').save()

        CommandBlueprintParam( command=command_bp, name='--min_contig', prefix='--min_contig ', position=2, default_value='20000',  \
            short_desc='[number] minimum length of the contig', \
            long_desc='All contigs shorter then "min_contig" are excluded from training procedure.').save()

        CommandBlueprintParam( command=command_bp, name='--max_contig', prefix='--max_contig ', position=3, default_value='10000000', \
        	short_desc='[number] maximum length of contig', \
            long_desc='Sequences longer then "max_contig" are split into shorter ones to avoid out of memory condition.').save()

        CommandBlueprintParam( command=command_bp, name='--BP', prefix='--BP ', position=4, \
            short_desc='switches off the branch point submodel and runs original ES algorithm (ON/OFF)').save()

        CommandBlueprintParam( command=command_bp, name='--ini_mod', prefix='--ini_mod ', position=5,  \
            short_desc='').save() 
            
        CommandBlueprintParam( command=command_bp, name='<sequence_file_name>', prefix=None, position=6, is_optional=False, \
            short_desc='input sequence in FASTA format').save() 
        

		
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_file_name>' )
        tool.can_create( filetype_name='GFF3', via_command=command_bp )
Exemplo n.º 2
0
    def handle(self, *args, **options):
        tool_name = 'GeneMarkS'
        tool_version = '4.6b'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://topaz.gatech.edu', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run GeneMarkS', \
                                       exec_path = tool_settings['gm_s_bin'] )
        command_bp.save()
        command_bp.parents.add(flow_bp)
        
        
        CommandBlueprintParam( command=command_bp, name='--name', prefix='--name ', position=1,default_value='GeneMark_hmm.mod', \
            short_desc='<string> name of output model file generated for GeneMark.hmm').save()
            
        CommandBlueprintParam( command=command_bp, name='--combine', prefix='--combine ', position=2, default_value='GeneMark_hmm_combined.mod' ,\
            short_desc='combine GeneMarkS generated and Heuristic model parameters into one integrated model').save()
   

        CommandBlueprintParam( command=command_bp, name='--gm', prefix='--gm ', position=3, default_value='GeneMark.mat',  \
            short_desc='generate model file for GeneMark').save()

        CommandBlueprintParam( command=command_bp, name='--species', prefix='--species ', position=4, \
            short_desc='<string> name of a species in a model file' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--clean', prefix='--clean ', position=5, has_no_value=True, \
            short_desc='delete all temporary files').save()

        CommandBlueprintParam( command=command_bp, name='--order', prefix='--order ', position=6, default_value='2', \
            short_desc='<number> markov chain order. (default: 2; supported in range: >= 0)' ).save()

        CommandBlueprintParam( command=command_bp, name='--gcode', prefix='--gcode ', position=7, default_value='11', \
            short_desc='<number> genetic code. default: 11; supported: 11, 4 and 1)' ).save()

        CommandBlueprintParam( command=command_bp, name='--shape', prefix='--shape ', position=8, default_value='partial', \
            short_desc='<string> sequence organization (default: partial; supported: linear, circular and partial)' ).save()

        CommandBlueprintParam( command=command_bp, name='--motif', prefix='--motif ', position=9, default_value='1', \
            long_desc='<number> iterative search for a sequence motif associated with CDS start. (default: 1; supported: 1 <true> and 0 <false>)' ).save()

        CommandBlueprintParam( command=command_bp, name='--width', prefix='--width ', position=10, default_value='6' , \
            short_desc='<number> motif width (default: 6; supported in range: >= 3)' ).save()

        CommandBlueprintParam( command=command_bp, name='--prestart', prefix='--prestart ', position=11, default_value='26', \
            long_desc='<number> length of sequence upstream of translation initiation site that presumably includes the motif (default: 26; supported in range: >= 0)' ).save()

        CommandBlueprintParam( command=command_bp, name='--identity', prefix='--identity ', position=12, default_value='0.99', \
            long_desc='<number> identity level assigned for termination of iterations (default: 0.99; supported in range: >=0 and <= 1)' ).save()

        CommandBlueprintParam( command=command_bp, name='--matrix', prefix='--matrix ', position=13, default_value='10', \
            short_desc='<number> maximum number of iterations (default: 10; supported in range: >= 1)').save()

        CommandBlueprintParam( command=command_bp, name='--fixmotif', prefix='--fixmotif ', position=14, has_no_value = True, \
            long_desc='prohibits gene overlap (if not specified: overlaps are allowed)').save()

        CommandBlueprintParam( command=command_bp, name='--offover', prefix='--offover ', position=15, has_no_value = True, \
            short_desc='prohibits gene overlap (if not specified: overlaps are allowed)').save()

        CommandBlueprintParam( command=command_bp, name='--strand', prefix='--strand ', position=16, default_value='both', \
            short_desc='<string> sequence strand to predict genes in (default: both; supported: direct, reverse and both )').save()


        CommandBlueprintParam( command=command_bp, name='--prok', prefix='--prok ', position=16, has_no_value = True, \
			short_desc='same as:  --combine --clean --gm', \
            long_desc='to run program on prokaryotic sequence or phage with building models for both GeneMark and GeneMark.hmm').save()

        CommandBlueprintParam( command=command_bp, name='--euk', prefix='--euk ', position=17, has_no_value = True, \
            short_desc='same as:  --offover --gcode 1 --clean --fixmotif --prestart 6 --width 12 --order 4 --gm', \
            long_desc='to run program on eukaryotic intron-less sequence (i.e. low eukaryote)').save()

        CommandBlueprintParam( command=command_bp, name='--viral', prefix='--viral ', position=18, has_no_value = True, \
	    short_desc='same as:  --combine --gcode 1 --clean --fixmotif --prestart 6 --width 12 --gm', \
            long_desc='to run program on a eukaryotic viral genome').save()


        CommandBlueprintParam( command=command_bp, name='--par', prefix='--par ', position=19,  \
            short_desc='<file name> custom parameters for GeneMarkS',  \
            long_desc='default is selected based on gcode value: par_<gcode>.default').save()
            
        CommandBlueprintParam( command=command_bp, name='--imod', prefix='--imod ', position=20,  \
            short_desc='<file name> custom initiation model for GeneMarkS',  \
            long_desc='default: heuristic model derived from GC composition of input sequence').save()
  
        CommandBlueprintParam( command=command_bp, name='--test', prefix='--test ', position=21, has_no_value=True,  \
            short_desc='installation test').save()
  
        CommandBlueprintParam( command=command_bp, name='--verbose', prefix='--verbose ', position=22, has_no_value=True,  \
            short_desc='prints stderr').save()
 
        CommandBlueprintParam( command=command_bp, name='<sequence_file_name>', prefix=None, position=23, is_optional=False, \
            short_desc='input sequence in FASTA format' ).save()

        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<sequence_file_name>' )
        tool.can_create( filetype_name='GFF3', via_command=command_bp )
    def handle(self, *args, **options):
        tool_name = 'Trinity in silico read normalization'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('Trinity', tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Large RNA-Seq data sets, such as those exceeding 300M pairs, are best suited for in silico normalization prior to running Trinity, in order to reduce memory requirements and greatly improve upon runtimes. Before running the normalization, be sure that in the case of paired reads, the left read names end with suffix /1 and the right read names end with /2')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/trinity_insilico_normalization.html', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run Trinity read normalization', \
                                       exec_path = tool_settings['normalization_script'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--left_list', prefix='--left_list ', position=3, \
            short_desc='Left reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--right_list', prefix='--right_list ', position=4, \
            short_desc='Right reads, if using a list file.  One file path per line', \
            long_desc='If you have read collections in different files you can use list files, where each line in a list file is the full path to an input file.  This saves you the time of combining them just so you can pass a single file for each direction.').save()

        CommandBlueprintParam( command=command_bp, name='--pairs_together', prefix='--pairs_together ', position=6, \
            has_no_value=True, short_desc='Process paired reads by averaging stats between pairs and retaining linking info' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=7, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=8, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='normalized_reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--JELLY_CPU', prefix='--JELLY_CPU ', position=9, \
            short_desc='Number of threads for Jellyfish to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--PARALLEL_STATS', prefix='--PARALLEL_STATS ', position=10, \
            has_no_value=True, short_desc='Generate read stats in parallel for paired reads (Figure 2X Inchworm memory requirement)' ).save()

        CommandBlueprintParam( command=command_bp, name='--KMER_SIZE', prefix='--KMER_SIZE ', position=11, \
            short_desc='K-mer size for de Bruijn graph construction', default_value='25' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', \
            position=12, short_desc='Minimum kmer coverage for catalog construction', default_value='1' ).save()

        CommandBlueprintParam( command=command_bp, name='--max_pct_stdev', prefix='--max_pct_stdev ', position=13, \
            short_desc='Maximum pct of mean for stdev of kmer coverage across read', default_value='100' ).save()

        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--single' )

        # TODO: parameter grouping needs to be applied here.
        # TODO: needs improving.  Unfortunately, Trinity currently only supports output definition
        #  at the directory level, and the file names under that are created by convention.
        #  I've written Brian to see if I can add this
        tool.can_create( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--output' )
        tool.can_create( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--output' )
        tool.can_create( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--output' )
Exemplo n.º 4
0
    def handle(self, *args, **options):
        tool_name = 'Prodigal'
        tool_version = '2.60'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s', name=tool_name )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='https://code.google.com/p/prodigal/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run prodigal', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )

        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', position=1, \
            short_desc='Write protein translations to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', position=2, has_no_value=True, \
            short_desc='Closed ends.  Do not allow genes to run off edges' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', position=3, \
            short_desc='Write nucleotide sequences of genes to the selected file' ).save()

        ## TODO: limit choices to (gbk, gff, or sco)
        CommandBlueprintParam( command=command_bp, name='-f', prefix='-f ', position=4, default_value='gbk', \
            short_desc='Select output format (gbk, gff, or sco).  Default is gbk' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', position=5, default_value='11', \
            short_desc='Specify a translation table to use (default 11)' ).save()

        CommandBlueprintParam( command=command_bp, name='-i', prefix='-i ', position=6, is_optional=False, \
            short_desc='Specify input file (default reads from stdin).' ).save()

        CommandBlueprintParam( command=command_bp, name='-m', prefix='-m ', position=7, has_no_value=True, \
            short_desc='Treat runs of Ns as masked sequence and do not build genes across them' ).save()

        CommandBlueprintParam( command=command_bp, name='-n', prefix='-n ', position=8, has_no_value=True, \
            short_desc='Bypass the Shine-Dalgarno trainer and force the program to scan for motifs' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=9, is_optional=False, \
            short_desc='Specify output file' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', position=10, default_value='single', \
            short_desc='Select procedure (single or meta).  Default is single.' ).save()

        CommandBlueprintParam( command=command_bp, name='-s', prefix='-s ', position=11, \
            short_desc='Write all potential genes (with scores) to the selected file' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, \
            short_desc='Write or read the specified training file', \
            long_desc='Write a training file (if none exists); otherwise, read and use the specified training file' ).save()


        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='-i' )
        tool.can_create( filetype_name='GenBank Flat File Format', via_command=command_bp, via_params=['-o', '-f=gbk'] )
        tool.can_create( filetype_name='GFF3', via_command=command_bp, via_params=['-o', '-f=gff'] )