Exemplo n.º 1
0
    def handle(self, *args, **options):
        tool_name = 'NUCmer'
        tool_version = '3.23'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('MUMmer', tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://mummer.sourceforge.net/manual/#nucmer', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run NUCmer', \
                                       exec_path = tool_settings['nucmer_bin'] )
        command_bp.save()
        command_bp.parents.add(flow_bp)

        # USAGE: nucmer  [options]  <Reference>  <Query>

        CommandBlueprintParam( command=command_bp, name='STDOUT', position=0 ).save();
        CommandBlueprintParam( command=command_bp, name='STDERR', position=0 ).save();
        
        CommandBlueprintParam( command=command_bp, name='--mum', prefix='--mum ', has_no_value=True, position=1, \
            short_desc='Use anchor matches that are unique in both the reference and query' ).save()

        CommandBlueprintParam( command=command_bp, name='--mumreference', prefix='--mumreference ', has_no_value=True, position=2, \
            short_desc='Use anchor matches that are unique in the reference but not necessarily unique in the query' ).save()

        CommandBlueprintParam( command=command_bp, name='-b', prefix='-b ', position=3, default_value='200', \
            short_desc='Alignment extension distance', \
            long_desc='Distance an alignment extension will attempt to extend poor scoring regions before giving up').save()

        CommandBlueprintParam( command=command_bp, name='-c', prefix='-c ', default_value='65', position=4, \
            short_desc='Minimum length of a cluster of matches' ).save()

        CommandBlueprintParam( command=command_bp, name='--nodelta', prefix='--nodelta ', has_no_value=True, position=5, \
            short_desc='Toggles off creation of delta file' ).save()

        CommandBlueprintParam( command=command_bp, name='-D', prefix='-D ', default_value='5', position=6, \
            short_desc='Maximum diagonal difference between two adjacent anchors in a cluster' ).save()

        CommandBlueprintParam( command=command_bp, name='-d', prefix='-d ', default_value='0.12', position=7, \
            short_desc='Maximum diagonal difference ratio', \
            long_desc='Maximum diagonal difference between two adjacent anchors in a cluster as a differential fraction of the gap length ' ).save()

        CommandBlueprintParam( command=command_bp, name='--noextend', prefix='--noextend ', has_no_value=True, position=8, \
            short_desc='Toggles off the cluster extension step' ).save()

        CommandBlueprintParam( command=command_bp, name='--forward', prefix='--forward ', has_no_value=True, position=9, \
            short_desc='Use only the forward strand of the Query sequences' ).save()

        CommandBlueprintParam( command=command_bp, name='-g', prefix='-g ', default_value='90', position=10, \
            short_desc='Maximum gap between two adjacent matches in a cluster' ).save()

        CommandBlueprintParam( command=command_bp, name='-l', prefix='-l ', default_value='20', position=11, \
            short_desc='Minimum length of a single match' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--nooptimize', prefix='--nooptimize ', has_no_value=True, position=12, \
            short_desc='Toggle off alignment score optimization', \
            long_desc='Toggles off alignment score optimization, i.e. if an alignment extension reaches the end of a sequence, it will backtrack to optimize the alignment score instead of terminating the alignment at the end of the sequence').save()

        # we make this one required just so that a tool can always look up the output file more easily
        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', default_value='out', position=17, is_optional=False, \
            short_desc='Sets the output file prefix, which can include the directory path' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--reverse', prefix='--reverse ', has_no_value=True, position=13, \
            short_desc='Use only the reverse complement of the Query sequences' ).save()

        CommandBlueprintParam( command=command_bp, name='--nosimplify', prefix='--nosimplify ', has_no_value=True, position=14, \
            short_desc='Removes shadowed clusters', \
            long_desc='Simplify alignments by removing shadowed clusters. Turn this option off if aligning a sequence to itself to look for repeats' ).save()

        CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        CommandBlueprintParam( command=command_bp, name='<query_in>', prefix=None, position=16, is_optional=False, \
            short_desc='Input query FASTA file' ).save()
        
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' )
        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<query_in>' )
        tool.creates( filetype_name='MUMmer delta file', via_command=command_bp, via_param='STDOUT' )
Exemplo n.º 2
0
    def handle(self, *args, **options):
        tool_name = 'Trinity'
        tool_version = 'r2013-02-25'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format(tool_name, tool_version) ]

        flow_bp = FlowBlueprint( type='s' )
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://trinityrnaseq.sourceforge.net/', \
                               flow_bp=flow_bp )
        tool.save()


        command_bp = CommandBlueprint( name = 'Run Trinity', \
                                       exec_path = tool_settings['exec_path'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )


        CommandBlueprintParam( command=command_bp, name='--seqType', prefix='--seqType ', position=1, \
            is_optional=False, short_desc='Type of reads: (cfa, cfq, fa, or fq)' ).save()

        CommandBlueprintParam( command=command_bp, name='--JM', prefix='--JM ', position=2, \
            is_optional=False, short_desc='Number of GB of system memory to use for k-mer counting by jellyfish (eg. 10G).  Include the G character.' ).save()

        CommandBlueprintParam( command=command_bp, name='--left', prefix='--left ', position=3, \
            short_desc='Left reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--right', prefix='--right ', position=4, \
            short_desc='Right reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--single', prefix='--single ', position=5, \
            short_desc='Single (unpaired) reads' ).save()

        CommandBlueprintParam( command=command_bp, name='--SS_lib_type', prefix='--SS_lib_type ', position=6, \
            short_desc='Strand-specific RNA-Seq read orientation.  if paired: RF or FR, if single: F or R.  (dUTP method = RF)' ).save()

        CommandBlueprintParam( command=command_bp, name='--output', prefix='--output ', position=7, \
            short_desc='Name of directory for output (will be created if doesn\'t already exist.', \
            default_value='trinity_out_dir' ).save()

        CommandBlueprintParam( command=command_bp, name='--CPU', prefix='--CPU ', position=8, \
            short_desc='Number of CPUs to use', default_value='2' ).save()

        CommandBlueprintParam( command=command_bp, name='--min_contig_length', prefix='--min_contig_length ', \
            position=9, short_desc='Minimum assembled contig length to report', default_value='200' ).save()

        CommandBlueprintParam( command=command_bp, name='--jaccard_clip', prefix='--jaccard_clip ', position=10, \
            has_no_value=True, short_desc='Set if you have paired reads and expect high gene density with UTR overlap.  This is an expensive operation.' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_cleanup', prefix='--no_cleanup ', position=11, \
            has_no_value=True, short_desc='Retain all intermediate input files' ).save()


        ####################################################
        # Inchworm and K-mer counting-related options: #####

        CommandBlueprintParam( command=command_bp, name='--min_kmer_cov', prefix='--min_kmer_cov ', position=12, \
            short_desc='Min count for K-mers to be assembled by Inchworm', default_value='1' ).save()

        ## Should later add the --no_run_quantifygraph option and process the rest via an iterator

        #####################################
        ###  Butterfly-related options:  ####
        
        CommandBlueprintParam( command=command_bp, name='--max_number_of_paths_per_node', prefix='--max_number_of_paths_per_node ', \
            position=13, short_desc='Only most supported (N) paths are extended from node A->B, mitigating combinatoric path explorations', \
            default_value='10' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--group_pairs_distance', prefix='--group_pairs_distance ', \
            position=14, short_desc='Maximum length expected between fragment pairs.  Reads outside this will be treated as single-end', \
            default_value='500' ).save()

        CommandBlueprintParam( command=command_bp, name='--path_reinforcement_distance', prefix='--path_reinforcement_distance ', \
            position=15, short_desc='Minimum overlap of reads with growing transcript path (default: PE: 75, SE: 25)' ).save()

        CommandBlueprintParam( command=command_bp, name='--no_triplet_lock', prefix='--no_triplet_lock ', position=16, \
            has_no_value=True, short_desc='Do not lock triplet-supported nodes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceMax', prefix='--bflyHeapSpaceMax ', position=17, \
            default_value='20G', short_desc='Java max heap space setting for butterfly' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyHeapSpaceInit', prefix='--bflyHeapSpaceInit ', position=18, \
            default_value='1G', short_desc='Java initial heap space settings for butterfly' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bflyGCThreads', prefix='--bflyGCThreads ', position=19, \
            short_desc='Threads for garbage collection' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCPU', prefix='--bflyCPU ', position=20, \
            short_desc='CPUs to use.  Default will match --CPU value' ).save()

        CommandBlueprintParam( command=command_bp, name='--bflyCalculateCPU', prefix='--bflyCalculateCPU ', position=21, \
            short_desc='Calculate CPUs based on 805 of max_memory divided by bflyHeapSpaceMax' ).save()

        # TODO: needs improving.  Unfortunately, Trinity currently only supports output definition
        #  at the directory level, and the file names under that are created by convention.
        #  I've written Brian to see if I can add this
        tool.creates( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='--output' )
    
        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTQ (Sanger, paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTQ (Sanger, unpaired reads)', via_command=command_bp, via_param='--single' )

        # TODO: parameter grouping needs to be applied here.
        tool.can_use( filetype_name='FASTA (paired reads, left)', via_command=command_bp, via_param='--left' )
        tool.can_use( filetype_name='FASTA (paired reads, right)', via_command=command_bp, via_param='--right' )
        tool.can_use( filetype_name='FASTA (unpaired reads)', via_command=command_bp, via_param='--single' )
Exemplo n.º 3
0
    def handle(self, *args, **options):
        tool_name = 'Bowtie-build'
        tool_version = '1.0.0'
        
        if self.already_exists(tool_name, tool_version):
            print("INFO: tool {0} {1} already exists.  Skipping.".format(tool_name, tool_version) )
            return True

        settings = configparser.ConfigParser()
        settings.read( os.path.join( os.path.abspath(os.path.dirname(__file__)), '../../settings.ini') )

        tool_settings = settings[ "{0} {1}".format('Bowtie', tool_version) ]

        flow_bp = FlowBlueprint( type='s', \
                                 description='Bowtie is an ultrafast, memory-efficient short read aligner. It aligns short DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to keep its memory footprint small: typically about 2.2 GB for the human genome (2.9 GB for paired-end).')
        flow_bp.save()

        tool = StandaloneTool( name=tool_name, \
                               version=tool_version, \
                               primary_site='http://bowtie-bio.sourceforge.net/index.shtml', \
                               flow_bp=flow_bp )
        tool.save()

        command_bp = CommandBlueprint( name = 'Build an index for bowtie', \
                                       exec_path = tool_settings['bowtie_build_bin'] )
        command_bp.save()
        command_bp.parents.add( flow_bp )


        # bowtie-build [options]* <reference_in> <ebwt_outfile_base>

        CommandBlueprintParam( command=command_bp, name='-C', prefix='-C ', has_no_value=True, position=1, \
            short_desc='Build a colorspace index' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-a', prefix='-a ', has_no_value=True, position=2, \
            short_desc='Disable automatic -p/--bmax/--dcv memory-fitting' ).save()

        CommandBlueprintParam( command=command_bp, name='-p', prefix='-p ', has_no_value=True, position=3, \
            short_desc='Use packed strings internally; slower, uses less mem' ).save()

        CommandBlueprintParam( command=command_bp, name='-B', prefix='-B ', has_no_value=True, position=4, \
            short_desc='Build both letter- and colorspace indexes' ).save()
        
        CommandBlueprintParam( command=command_bp, name='--bmax', prefix='--bmax ', position=5, \
            short_desc='Max bucket sz for blockwise suffix-array builder' ).save()

        CommandBlueprintParam( command=command_bp, name='--bmaxdivn', prefix='--bmaxdivn ', position=6, default_value='4', \
            short_desc='Max bucket sz as divisor of ref len' ).save()

        CommandBlueprintParam( command=command_bp, name='--dcv', prefix='--dcv ', position=7, default_value='1024', \
            short_desc='Diff-cover period for blockwise' ).save()

        CommandBlueprintParam( command=command_bp, name='--nodc', prefix='--nodc ', has_no_value=True, position=8, \
            short_desc='Disable diff-cover (algorithm becomes quadratic)' ).save()

        CommandBlueprintParam( command=command_bp, name='-r', prefix='-r ', has_no_value=True, position=9, \
            short_desc='Do not build .3/.4.ebwt (packed reference) portion' ).save()
        
        CommandBlueprintParam( command=command_bp, name='-3', prefix='-3 ', has_no_value=True, position=10, \
            short_desc='Just build .3/.4.ebwt (packed reference) portion' ).save()

        CommandBlueprintParam( command=command_bp, name='-o', prefix='-o ', position=11, default_value='5', \
            short_desc='SA is sampled every 2^offRate BWT chars' ).save()

        CommandBlueprintParam( command=command_bp, name='-t', prefix='-t ', position=12, default_value='10', \
            short_desc='# of chars consumed in initial lookup' ).save()

        CommandBlueprintParam( command=command_bp, name='--ntoa', prefix='--ntoa ', has_no_value=True, position=13, \
            short_desc='Convert Ns in reference to As' ).save()

        CommandBlueprintParam( command=command_bp, name='--seed', prefix='--seed ', position=14, \
            short_desc='Seed for random number generator' ).save()

        CommandBlueprintParam( command=command_bp, name='<reference_in>', prefix=None, position=15, is_optional=False, \
            short_desc='Input reference FASTA file' ).save()

        CommandBlueprintParam( command=command_bp, name='<ebwt_outfile_base>', prefix=None, position=16, is_optional=False, \
            short_desc='Path to the basename of the ebwt files to be created' ).save()

        tool.needs( filetype_name='FASTA (nucleotide)', via_command=command_bp, via_param='<reference_in>' )
        tool.creates( filetype_name='Bowtie 1.0 index', via_command=command_bp, via_param='<ebwt_outfile_base>' )